[llvm] c6c4f54 - [AMDGPU] Add gfx11 runline to omod test. NFC
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 7 08:41:09 PST 2022
Author: Joe Nash
Date: 2022-12-07T11:40:39-05:00
New Revision: c6c4f54e6bf71b9e35a25c36d7c8ae59b636f3cc
URL: https://github.com/llvm/llvm-project/commit/c6c4f54e6bf71b9e35a25c36d7c8ae59b636f3cc
DIFF: https://github.com/llvm/llvm-project/commit/c6c4f54e6bf71b9e35a25c36d7c8ae59b636f3cc.diff
LOG: [AMDGPU] Add gfx11 runline to omod test. NFC
Added:
Modified:
llvm/test/CodeGen/AMDGPU/omod.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll
index efc443c5cb13..fcc3d6ec7369 100644
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefixes=VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11 %s
; IEEE bit enabled for compute kernel, so shouldn't use.
define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
@@ -38,6 +39,20 @@ define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspac
; VI-NEXT: v_mul_f32_e32 v2, 0.5, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f32_enable_ieee_signed_zeros:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v1, 0.5, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
%out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -84,6 +99,20 @@ define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(ptr addrspac
; VI-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f64_enable_ieee_signed_zeros:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v2, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
%out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
@@ -130,6 +159,20 @@ define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(ptr addrspace(1) %out
; VI-NEXT: v_mul_f32_e32 v2, 0.5, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f32_enable_ieee_nsz:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v1, 0.5, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
%out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -176,6 +219,20 @@ define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(ptr addrspace(1) %out
; VI-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f64_enable_ieee_nsz:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v2, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
%out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
@@ -203,6 +260,15 @@ define amdgpu_ps void @v_omod_div2_f32_signed_zeros(float %a) #4 {
; VI-NEXT: v_mul_f32_e32 v0, 0.5, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f32_signed_zeros:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 0.5
store float %div2, ptr addrspace(1) undef
@@ -226,6 +292,15 @@ define amdgpu_ps void @v_omod_div2_f64_signed_zeros(double %a) #4 {
; VI-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f64_signed_zeros:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
%div2 = fmul double %add, 0.5
store double %div2, ptr addrspace(1) undef
@@ -246,6 +321,13 @@ define amdgpu_ps void @v_omod_div2_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, 1.0 div:2
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 div:2
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 0.5
store float %div2, ptr addrspace(1) undef
@@ -266,6 +348,13 @@ define amdgpu_ps void @v_omod_div2_f64(double %a) #5 {
; VI-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 div:2
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 div:2
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
%div2 = fmul nsz double %add, 0.5
store double %div2, ptr addrspace(1) undef
@@ -286,6 +375,13 @@ define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:2
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:2
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 2.0
store float %div2, ptr addrspace(1) undef
@@ -306,6 +402,13 @@ define amdgpu_ps void @v_omod_mul2_f64(double %a) #5 {
; VI-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:2
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:2
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
%div2 = fmul nsz double %add, 2.0
store double %div2, ptr addrspace(1) undef
@@ -326,6 +429,13 @@ define amdgpu_ps void @v_omod_mul4_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul4_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 4.0
store float %div2, ptr addrspace(1) undef
@@ -346,6 +456,13 @@ define amdgpu_ps void @v_omod_mul4_f64(double %a) #5 {
; VI-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:4
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul4_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:4
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
%div2 = fmul nsz double %add, 4.0
store double %div2, ptr addrspace(1) undef
@@ -372,6 +489,18 @@ define amdgpu_ps void @v_omod_mul4_multi_use_f32(float %a) #0 {
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul4_multi_use_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v1, 4.0, v0
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v[0:1], v1, off
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 4.0
store float %div2, ptr addrspace(1) undef
@@ -393,6 +522,13 @@ define amdgpu_ps void @v_omod_mul4_dbg_use_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul4_dbg_use_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
call void @llvm.dbg.value(metadata float %add, i64 0, metadata !4, metadata !9), !dbg !10
%div2 = fmul float %add, 4.0
@@ -415,6 +551,13 @@ define amdgpu_ps void @v_clamp_omod_div2_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, 1.0 clamp div:2
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_clamp_omod_div2_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 clamp div:2
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 0.5
@@ -441,6 +584,15 @@ define amdgpu_ps void @v_omod_div2_clamp_f32(float %a) #0 {
; VI-NEXT: v_mul_f32_e32 v0, 0.5, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_clamp_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 clamp
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%max = call float @llvm.maxnum.f32(float %add, float 0.0)
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
@@ -465,6 +617,15 @@ define amdgpu_ps void @v_omod_div2_abs_src_f32(float %a) #0 {
; VI-NEXT: v_mul_f32_e64 v0, |v0|, 0.5
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_abs_src_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 0.5
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%abs.add = call float @llvm.fabs.f32(float %add)
%div2 = fmul float %abs.add, 0.5
@@ -486,6 +647,13 @@ define amdgpu_ps void @v_omod_add_self_clamp_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, v0 clamp
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_add_self_clamp_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, v0 clamp
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, %a
%max = call float @llvm.maxnum.f32(float %add, float 0.0)
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
@@ -509,6 +677,15 @@ define amdgpu_ps void @v_omod_add_clamp_self_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e32 v0, v0, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_add_clamp_self_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_max_f32_e64 v0, v0, v0 clamp
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%max = call float @llvm.maxnum.f32(float %a, float 0.0)
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
%add = fadd float %clamp, %clamp
@@ -532,6 +709,15 @@ define amdgpu_ps void @v_omod_add_abs_self_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, |v0|, |v0|
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_add_abs_self_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e64 v0, |v0|, |v0|
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
%abs.x = call float @llvm.fabs.f32(float %x)
%add = fadd float %abs.x, %abs.x
@@ -555,6 +741,15 @@ define amdgpu_ps void @v_omod_add_abs_x_x_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, |v0|, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_add_abs_x_x_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e64 v0, |v0|, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
%abs.x = call float @llvm.fabs.f32(float %x)
%add = fadd float %abs.x, %x
@@ -578,6 +773,15 @@ define amdgpu_ps void @v_omod_add_x_abs_x_f32(float %a) #0 {
; VI-NEXT: v_add_f32_e64 v0, v0, |v0|
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_add_x_abs_x_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e64 v0, v0, |v0|
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
%abs.x = call float @llvm.fabs.f32(float %x)
%add = fadd float %x, %abs.x
@@ -602,6 +806,15 @@ define amdgpu_ps void @v_omod_div2_omod_div2_f32(float %a) #0 {
; VI-NEXT: v_mul_f32_e32 v0, 0.5, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_omod_div2_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 div:2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2.0 = fmul float %add, 0.5
%div2.1 = fmul float %div2.0, 0.5
@@ -626,6 +839,15 @@ define amdgpu_ps void @v_omod_div2_f32_denormals(float %a) #2 {
; VI-NEXT: v_mul_f32_e32 v0, 0.5, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f32_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%div2 = fmul float %add, 0.5
store float %div2, ptr addrspace(1) undef
@@ -649,6 +871,15 @@ define amdgpu_ps void @v_omod_div2_f64_denormals(double %a) #6 {
; VI-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f64_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
%div2 = fmul double %add, 0.5
store double %div2, ptr addrspace(1) undef
@@ -672,6 +903,15 @@ define amdgpu_ps void @v_omod_mul2_f32_denormals(float %a) #2 {
; VI-NEXT: v_add_f32_e32 v0, v0, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_f32_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
%mul2 = fadd float %add, %add
store float %mul2, ptr addrspace(1) undef
@@ -695,6 +935,15 @@ define amdgpu_ps void @v_omod_mul2_f64_denormals(double %a) #2 {
; VI-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_f64_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
%mul2 = fadd double %add, %add
store double %mul2, ptr addrspace(1) undef
@@ -720,6 +969,15 @@ define amdgpu_ps void @v_omod_div2_f16_denormals(half %a) #0 {
; VI-NEXT: v_mul_f16_e32 v0, 0.5, v0
; VI-NEXT: flat_store_short v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f16_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v0, 0.5, v0
+; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
%div2 = fmul half %add, 0.5
store half %div2, ptr addrspace(1) undef
@@ -745,6 +1003,15 @@ define amdgpu_ps void @v_omod_mul2_f16_denormals(half %a) #0 {
; VI-NEXT: v_add_f16_e32 v0, v0, v0
; VI-NEXT: flat_store_short v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_f16_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_f16_e32 v0, v0, v0
+; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
%mul2 = fadd half %add, %add
store half %mul2, ptr addrspace(1) undef
@@ -768,6 +1035,13 @@ define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
; VI-NEXT: v_add_f16_e64 v0, v0, 1.0 div:2
; VI-NEXT: flat_store_short v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_div2_f16_no_denormals:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f16_e64 v0, v0, 1.0 div:2
+; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
%div2 = fmul half %add, 0.5
store half %div2, ptr addrspace(1) undef
@@ -790,6 +1064,16 @@ define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
; VI-NEXT: v_mul_f32_e32 v0, v1, v0
; VI-NEXT: flat_store_dword v[0:1], v0
; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_omod_mac_to_mad:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mul_f32_e32 v1, v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_f32_e64 v1, v1, v0 mul:2
+; GFX11-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%mul = fmul float %a, %a
%add = fadd float %mul, %b
%mad = fmul float %add, 2.0
More information about the llvm-commits
mailing list