[llvm] 84fe863 - [AMDGPU] Test MAD formation on GFX11
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 04:03:39 PST 2022
Author: Jay Foad
Date: 2022-11-18T12:03:32Z
New Revision: 84fe863a1cf26d67d189df5fcd54b352ef7801a2
URL: https://github.com/llvm/llvm-project/commit/84fe863a1cf26d67d189df5fcd54b352ef7801a2
DIFF: https://github.com/llvm/llvm-project/commit/84fe863a1cf26d67d189df5fcd54b352ef7801a2.diff
LOG: [AMDGPU] Test MAD formation on GFX11
GFX11 has no MAD instructions, only FMA, so this is just testing that we
generate separate MUL and ADD.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/madak.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 46dc93f18cdaf..61881d48f19fb 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,9 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8PLUS,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8PLUS,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX8PLUS,GFX10PLUS-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX8PLUS,GFX10-MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX8PLUS,FMA,GFX10PLUS-FMA %s
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8PLUS,FMA,GFX940-FMA %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX8PLUS,GFX11-MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX8PLUS,FMA,GFX10PLUS-FMA %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -19,7 +20,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GFX10PLUS: {{flat|global}}_load_{{dword|b32}} [[VA:v[0-9]+]]
; GFX10PLUS: {{flat|global}}_load_{{dword|b32}} [[VB:v[0-9]+]]
; MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-; GFX10PLUS-MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -51,7 +52,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
; GFX6-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GFX8-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GFX6_8_9-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-; GFX10PLUS-MAD-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+; GFX10-MAD-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; FMA-DAG: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; MAD-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
; GFX10PLUS-FMA-DAG: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VC]], 0x41200000
@@ -84,7 +85,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[VA:v[0-9]+]]
; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-; GFX10PLUS-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
+; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -112,7 +113,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
; GFX10PLUS: {{flat|global}}_load_{{dword|b32}} [[VA:v[0-9]+]]
; GFX10PLUS: {{flat|global}}_load_{{dword|b32}} [[VB:v[0-9]+]]
; MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-; GFX10PLUS-MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
+; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
; FMA: v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -136,9 +137,11 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out
; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}}{{(_addtid)?}} [[VA:v[0-9]+]]
; GCN-NOT: v_madak_f32
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
-; GFX10PLUS-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
+; GFX10-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
; GFX10PLUS-FMA: v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
; GFX940-FMA: v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
+; GFX11-MAD: v_mul_f32_e32 [[VMUL:v[0-9]+]], [[SB]], [[VA]]
+; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -158,9 +161,11 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float
; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}}{{(_addtid)?}} [[VA:v[0-9]+]]
; GFX6_8_9-NOT: v_madak_f32
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
-; GFX10PLUS-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
+; GFX10-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
; GFX10PLUS-FMA: v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
; GFX940-FMA: v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
+; GFX11-MAD: v_mul_f32_e32 [[VMUL:v[0-9]+]], [[SB]], [[VA]]
+; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -177,9 +182,11 @@ define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float
; GCN-LABEL: {{^}}s_s_madak_f32:
; GCN-NOT: v_madak_f32
; GFX8_9: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; GFX10PLUS-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; GFX10PLUS-FMA: v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; GFX940-FMA: v_fmac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
+; GFX11-MAD: v_dual_mov_b32 {{v[0-9]+}}, 0 :: v_dual_add_f32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
@@ -193,9 +200,11 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
; GFX8PLUS: {{flat|global}}_load_{{dword|b32}} [[VB:v[0-9]+]]
; GFX8PLUS: {{flat|global}}_load_{{dword|b32}} [[VA:v[0-9]+]]
; GFX6_8_9: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
-; GFX10PLUS-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
+; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
; GFX10PLUS-FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
; GFX940-FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{s[0-9]+}}
+; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], |{{v[0-9]+}}|, {{v[0-9]+}}
+; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -220,9 +229,11 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
; GFX8PLUS: {{flat|global}}_load_{{dword|b32}} [[VB:v[0-9]+]]
; GFX8PLUS: {{flat|global}}_load_{{dword|b32}} [[VA:v[0-9]+]]
; GFX6_8_9: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
-; GFX10PLUS-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
+; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
; GFX10PLUS-FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
; GFX940-FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}}
+; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], {{v[0-9]+}}, |{{v[0-9]+}}|
+; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -249,10 +260,13 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
; GCN: s_load_{{dword|b32}} [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
; MAD: v_mov_b32_e32 [[MADAK:v[0-9]+]], 0x42280000
; MAD: v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
-; GFX10PLUS: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
-; GFX10PLUS-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; GFX10-MAD: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+; GFX10PLUS-FMA: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; GFX10PLUS-FMA: v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; GFX940-FMA: v_fmac_f32_e64 [[MADAK:v[0-9]+]], [[SGPR0]], 0.5
+; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], [[SGPR0]], 0.5
+; GFX11-MAD: v_add_f32_e32 [[MADAK:v[0-9]+]], 0x42280000, [[VMUL]]
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
; GFX6: buffer_store_dword [[MUL]]
; GFX8PLUS: {{flat|global}}_store_{{dword|b32}} v[{{[0-9:]+}}], [[MUL]]
More information about the llvm-commits
mailing list