[llvm] d15c454 - [FPEnv][AMDGPU] Correct strictfp tests.

Mon Feb 5 06:30:35 PST 2024

Author: Kevin P. Neal
Date: 2024-02-05T09:29:31-05:00
New Revision: d15c454bedc05775b5080e1d2130b0554d5e5a81

URL: https://github.com/llvm/llvm-project/commit/d15c454bedc05775b5080e1d2130b0554d5e5a81
DIFF: https://github.com/llvm/llvm-project/commit/d15c454bedc05775b5080e1d2130b0554d5e5a81.diff

LOG: [FPEnv][AMDGPU] Correct strictfp tests.

Correct AMDGPU strictfp tests to follow the rules documented in the
LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

These tests needed the strictfp attribute added to function calls and
some declarations.

Some of the tests now pass with D146845, others get farther along and
fail with D146845. The tests revealed that further work is required
in mostly AMDGPU atomics to get the tests passing.

Since I was here anyway I removed the strictfp attribute from some
constrained intrinsic declarations. They have this attribute by default.

Test changes verified with D146845.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
    llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
    llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
    llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
    llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
    llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
index 9896373b08d1f..85286841cbcac 100644

--- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
   ret <2 x half> %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
+attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
+attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
+attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }

diff  --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index df254a059d9f3..6eec8d5356ca8 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB3_2
 ; GFX1132-DPP-NEXT:  .LBB3_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
   ret void
 }
@@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    global_atomic_add_f32 v4, v0, s[0:1]
 ; GFX1132-DPP-NEXT:  .LBB6_2:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
   ret void
 }
@@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB8_2
 ; GFX1132-DPP-NEXT:  .LBB8_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 5f4d0302ab38e..c927a0e1ef06c 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB3_2
 ; GFX1132-DPP-NEXT:  .LBB3_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
   ret void
 }
@@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB6_2
 ; GFX1132-DPP-NEXT:  .LBB6_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
   ret void
 }
@@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB8_2
 ; GFX1132-DPP-NEXT:  .LBB8_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
index 6c920e84bcdb1..aca7d3c720ceb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
@@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {
 ; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %fpmode = call i32 @llvm.get.fpmode.i32()
+  %fpmode = call i32 @llvm.get.fpmode.i32() strictfp
   ret i32 %fpmode
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
index 965040d0d879c..0f80327638a9c 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
 ; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
 ; GFX1011-NEXT:    s_setpc_b64 s[30:31]
-  %abs.arg = call float @llvm.fabs.f32(float %arg)
+  %abs.arg = call float @llvm.fabs.f32(float %arg) #0
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void
 }
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
 
-declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
 
-declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fabs.f32(float)
 
 attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }

diff  --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
index 468a8463a06d6..3bf7fec81c041 100644
--- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
@@ -1,17 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
 
-declare void @f16_user(half)
-declare half @f16_result()
+declare void @f16_user(half) #0
+declare half @f16_result() #0
 
-declare void @v2f16_user(<2 x half>)
-declare <2 x half> @v2f16_result()
+declare void @v2f16_user(<2 x half>) #0
+declare <2 x half> @v2f16_result() #0
 
-declare void @v4f16_user(<4 x half>)
-declare <4 x half> @v4f16_result()
+declare void @v4f16_user(<4 x half>) #0
+declare <4 x half> @v4f16_result() #0
 
-declare void @v8f16_user(<8 x half>)
-declare <8 x half> @v8f16_result()
+declare void @v8f16_user(<8 x half>) #0
+declare <8 x half> @v8f16_result() #0
 
 define void @f16_arg(half %arg, ptr %ptr) #0 {
 ; GFX7-LABEL: f16_arg: