[llvm] [GlobalIsel] Removeb NoNaNsFPMath uses (PR #163484)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 22:41:47 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/163484
>From a366748b9f95d5819e166430f55ee34e8133e9e5 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 15 Oct 2025 13:07:49 +0800
Subject: [PATCH] [GlobalISel] Remove NoNaNsFPMath uses
---
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 3 +-
.../GlobalISel/clamp-minmax-const-combine.ll | 9 +-
.../GlobalISel/fmed3-min-max-const-combine.ll | 11 +-
llvm/test/CodeGen/AMDGPU/fmed3.ll | 275 +++++++++---------
llvm/test/CodeGen/AMDGPU/minmax.ll | 24 +-
5 files changed, 159 insertions(+), 163 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6ad7213..ca82857319abc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 26b9d996fc284..8705647e36fe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
; global nnan function attribute always forces clamp combine
-define float @test_min_max_global_nnan(float %a) #3 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -223,11 +223,11 @@ define float @test_min_max_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
- %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #3 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,7 +244,7 @@ define float @test_max_min_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 1.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
ret float %fmed
}
@@ -414,5 +414,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index d2c93e75cbed6..696a87b9d0b4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; global nnan function attribute always forces fmed3 combine
-define float @test_min_max_global_nnan(float %a) #2 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -254,12 +254,12 @@ define float @test_min_max_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
%fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #2 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,8 +281,8 @@ define float @test_max_min_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
ret float %fmed
}
@@ -560,4 +560,3 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 9e152253bb6ca..3145a272ae48f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -10,7 +10,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -342,7 +342,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -453,7 +453,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -569,7 +569,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -740,7 +740,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -955,14 +955,14 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
%outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%a = load float, ptr addrspace(1) %gep0
- %max = call float @llvm.maxnum.f32(float %a, float 2.0)
- %med = call float @llvm.minnum.f32(float %max, float 4.0)
+ %max = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call nnan float @llvm.minnum.f32(float %max, float 4.0)
store float %med, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -1297,10 +1297,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1487,10 +1487,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%b.fneg = fsub float -0.0, %b
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1677,10 +1677,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%c.fneg = fsub float -0.0, %c
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1872,14 +1872,14 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %b.fabs = call float @llvm.fabs.f32(float %b)
- %c.fabs = call float @llvm.fabs.f32(float %c)
+ %b.fabs = call nnan float @llvm.fabs.f32(float %b)
+ %c.fabs = call nnan float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
@@ -2082,16 +2082,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
%c.fabs = call float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2418,7 +2418,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou
ret void
}
-define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
ret void
}
-define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2878,10 +2878,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3030,10 +3030,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3220,10 +3220,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3372,10 +3372,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3524,10 +3524,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3676,10 +3676,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3828,10 +3828,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3980,10 +3980,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4132,10 +4132,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4284,10 +4284,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4436,10 +4436,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4588,10 +4588,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4740,10 +4740,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -4892,10 +4892,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -5044,10 +5044,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -5196,10 +5196,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -5348,10 +5348,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -5503,10 +5503,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -5515,7 +5515,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
; Negative patterns
; ---------------------------------------------------------------------
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -5717,7 +5717,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -5944,7 +5944,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6146,7 +6146,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6352,7 +6352,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6527,7 +6527,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6702,7 +6702,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6877,7 +6877,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -7270,10 +7270,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -7428,13 +7428,13 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %max = call float @llvm.maxnum.f32(float %a, float %b)
- %minmax = call float @llvm.minnum.f32(float %max, float %c)
+ %max = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %minmax = call nnan float @llvm.minnum.f32(float %max, float %c)
store float %minmax, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -7597,7 +7597,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -7865,7 +7865,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: two_non_inline_constant:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -7998,7 +7998,7 @@ define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr ad
}
; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
-define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: one_non_inline_constant:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -8137,7 +8137,7 @@ define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr ad
ret void
}
-define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: two_non_inline_constant_multi_use:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -8343,7 +8343,7 @@ define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %o
ret void
}
-define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
+define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) {
; SI-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8384,7 +8384,7 @@ define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
ret float %med
}
-define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) #1 {
+define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8452,7 +8452,7 @@ define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %
ret <2 x float> %med
}
-define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) #1 {
+define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8525,7 +8525,7 @@ define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(
ret { float, float } %ins.1
}
-define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) {
; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8567,7 +8567,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a)
ret float %med
}
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) {
; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8609,7 +8609,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a)
ret float %med
}
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) {
; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8651,7 +8651,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 {
ret float %med
}
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) {
; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8693,7 +8693,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 {
ret float %med
}
-define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 {
+define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8772,7 +8772,7 @@ define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 {
ret half %med
}
-define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) #1 {
+define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8848,7 +8848,7 @@ define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a)
ret <2 x half> %med
}
-define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) #1 {
+define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) {
; SI-LABEL: v_test_fmed3_r_i_i_f64_minimumnum_maximumnum:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8905,5 +8905,4 @@ declare half @llvm.minnum.f16(half, half) #0
declare half @llvm.maxnum.f16(half, half) #0
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 56f9c5dfe5068..d578d2e9720f0 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -612,10 +612,10 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
- %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
- %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
- %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %x, float %y)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %x, float %y)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %z)
+ %tmp3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %tmp3, ptr addrspace(1) %arg
ret void
}
@@ -646,10 +646,10 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
- %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
- %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
- %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
- %tmp3 = call float @llvm.maximumnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+ %tmp1 = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+ %tmp2 = call nnan float @llvm.minimumnum.f32(float %tmp1, float %z)
+ %tmp3 = call nnan float @llvm.maximumnum.f32(float %tmp0, float %tmp2)
store float %tmp3, ptr addrspace(1) %arg
ret void
}
@@ -1280,10 +1280,10 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; GISEL-GFX1250-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
; GISEL-GFX1250-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
- %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
- %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
- %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
- %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
+ %tmp0 = call nnan half @llvm.minnum.f16(half %x, half %y)
+ %tmp1 = call nnan half @llvm.maxnum.f16(half %x, half %y)
+ %tmp2 = call nnan half @llvm.minnum.f16(half %tmp1, half %z)
+ %tmp3 = call nnan half @llvm.maxnum.f16(half %tmp0, half %tmp2)
store half %tmp3, ptr addrspace(1) %arg
ret void
}
More information about the llvm-commits
mailing list