[llvm] aa77232 - [NFC][AMDGPU] Improve fused fmul+fadd tests.

Thu Jul 30 18:00:31 PDT 2020

Author: dfukalov
Date: 2020-07-31T04:00:09+03:00
New Revision: aa77232a63e3f7ca2f0077434d46e0f269499eae

URL: https://github.com/llvm/llvm-project/commit/aa77232a63e3f7ca2f0077434d46e0f269499eae
DIFF: https://github.com/llvm/llvm-project/commit/aa77232a63e3f7ca2f0077434d46e0f269499eae.diff

LOG: [NFC][AMDGPU] Improve fused fmul+fadd tests.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D84903

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
index 58d61ccaf97b..cd19e237be70 100644

--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -32,6 +32,46 @@ define amdgpu_kernel void @fmuladd_f16(half addrspace(1)* %out, half addrspace(1
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_f16:
+; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; VI-DENORM-CONTRACT: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX10-FLUSH:  v_mul_f16_e32
+; GFX10-FLUSH:  v_add_f16_e32
+; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define amdgpu_kernel void @fmul_fadd_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
+                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
+  %r0 = load half, half addrspace(1)* %in1
+  %r1 = load half, half addrspace(1)* %in2
+  %r2 = load half, half addrspace(1)* %in3
+  %mul = fmul half %r0, %r1
+  %add = fadd half %mul, %r2
+  store half %add, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_fadd_contract_f16:
+; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; VI-DENORM: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX10-FLUSH:  v_mul_f16_e32
+; GFX10-FLUSH:  v_add_f16_e32
+; GFX10-DENORM: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define amdgpu_kernel void @fmul_fadd_contract_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
+                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
+  %r0 = load half, half addrspace(1)* %in1
+  %r1 = load half, half addrspace(1)* %in2
+  %r2 = load half, half addrspace(1)* %in3
+  %mul = fmul half %r0, %r1
+  %add = fadd contract half %mul, %r2
+  store half %add, half addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f16
 ; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]],

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 7b38b79c78a1..19862fbf13fe 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -69,6 +69,24 @@ define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspa
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_contract_f32:
+; GCN-FLUSH-FMAC: v_fmac_f32_e32
+
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
+
+; GCN-DENORM-FASTFMA: v_fma_f32
+define amdgpu_kernel void @fmul_fadd_contract_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                           float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
+  %r0 = load volatile float, float addrspace(1)* %in1
+  %r1 = load volatile float, float addrspace(1)* %in2
+  %r2 = load volatile float, float addrspace(1)* %in3
+  %mul = fmul float %r0, %r1
+  %add = fadd contract float %mul, %r2
+  store float %add, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
index 8d91a56ee421..cdc4f3b5f31d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -1,4 +1,4 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde  -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde  -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
@@ -33,6 +33,20 @@ define amdgpu_kernel void @fmul_fadd_f64(double addrspace(1)* %out, double addrs
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_contract_f64:
+; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                           double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
+  %r2 = load double, double addrspace(1)* %in3
+  %tmp = fmul double %r0, %r1
+  %r3 = fadd contract double %tmp, %r2
+  store double %r3, double addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fadd_a_a_b_f64:
 ; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]],
 ; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]],

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index 95ea266c6eb7..74c91fbea44d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -27,6 +27,39 @@ define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x hal
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_v2f16:
+; GFX9-DENORM-STRICT: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; GFX9-DENORM-STRICT: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
+                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
+  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
+  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
+  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+  %r3 = fmul <2 x half> %r0, %r1
+  %r4 = fadd <2 x half> %r3, %r2
+  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_fadd_contract_v2f16:
+; GFX9-FLUSH: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
+                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
+  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
+  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
+  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+  %r3 = fmul <2 x half> %r0, %r1
+  %r4 = fadd contract <2 x half> %r3, %r2
+  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_v2f16:
 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],