[llvm] [AMDGPU][GlobalIsel] Add register bank legalization rules for amdgcn atomic fminmax num (PR #184564)
Syadus Sefat via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 11:54:28 PST 2026
https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/184564
>From e847866c84b42bd09e9f6eae1e66b55e6c056cda Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Wed, 4 Mar 2026 01:49:18 -0600
Subject: [PATCH] [AMDGPU][GlobalIsel] Add register bank legalization rules for
amdgcn_global_atomic_flat_fminmax
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 7 +
.../AMDGPU/fp-min-max-num-flat-atomics.ll | 217 +++++++++++++++---
.../AMDGPU/fp-min-max-num-global-atomics.ll | 82 +++++--
3 files changed, 262 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a4945d610ce58..d0ee48d9235b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1489,6 +1489,13 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
.Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
+ addRulesForIOpcs(
+ {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num})
+ .Any({{DivS32}, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}}});
+
+ addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num})
+ .Any({{DivS32}, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}}});
+
addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
.Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
index 874aa543a214a..de84f686ac148 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
@@ -1,36 +1,56 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
-; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
+; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX,GFX12,GFX12-SDAG
+; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX,GFX12,GFX12-GISEL
+; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GFX,GFX1250,GFX1250-SDAG
+; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GFX,GFX1250,GFX1250-GISEL
declare float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
declare float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
define amdgpu_cs void @flat_atomic_fmin_num_f32_noret(ptr %ptr, float %data) {
-; GFX12-LABEL: flat_atomic_fmin_num_f32_noret:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2
-; GFX12-NEXT: s_endpgm
+; GFX-LABEL: flat_atomic_fmin_num_f32_noret:
+; GFX: ; %bb.0:
+; GFX: flat_atomic_min_num_f32 v[0:1], v2
+; GFX: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_cs void @flat_atomic_fmax_num_f32_noret(ptr %ptr, float %data) {
-; GFX12-LABEL: flat_atomic_fmax_num_f32_noret:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2
-; GFX12-NEXT: s_endpgm
+; GFX-LABEL: flat_atomic_fmax_num_f32_noret:
+; GFX: ; %bb.0:
+; GFX: flat_atomic_max_num_f32 v[0:1], v2
+; GFX: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
; GFX12-LABEL: flat_atomic_fmin_num_f32_rtn:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: flat_store_b32 v[3:4], v0
-; GFX12-NEXT: s_wait_dscnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12: ; %bb.0:
+; GFX12: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt_dscnt 0x0
+; GFX12: flat_store_b32 v[3:4], v0
+; GFX12: s_wait_dscnt 0x0
+; GFX12: ; return to shader part epilog
+;
+; GFX1250-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX1250-SDAG: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG: flat_store_b32 v[4:5], v0
+; GFX1250-SDAG: s_wait_dscnt 0x0
+; GFX1250-SDAG: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX1250-GISEL: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
+; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL: flat_store_b32 v[6:7], v0
+; GFX1250-GISEL: s_wait_dscnt 0x0
+; GFX1250-GISEL: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
ret float %ret
@@ -38,16 +58,161 @@ define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(ptr %ptr, float %data, ptr
define amdgpu_cs float @flat_atomic_fmax_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
; GFX12-LABEL: flat_atomic_fmax_num_f32_rtn:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: flat_store_b32 v[3:4], v0
-; GFX12-NEXT: s_wait_dscnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12: ; %bb.0:
+; GFX12: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt_dscnt 0x0
+; GFX12: flat_store_b32 v[3:4], v0
+; GFX12: s_wait_dscnt 0x0
+; GFX12: ; return to shader part epilog
+;
+; GFX1250-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX1250-SDAG: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG: flat_store_b32 v[4:5], v0
+; GFX1250-SDAG: s_wait_dscnt 0x0
+; GFX1250-SDAG: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
+; GFX1250-GISEL: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
+; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL: flat_store_b32 v[6:7], v0
+; GFX1250-GISEL: s_wait_dscnt 0x0
+; GFX1250-GISEL: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
+ store float %ret, ptr %out
+ ret float %ret
+}
+
+define amdgpu_ps void @flat_atomic_fmin_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
+; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
+; GFX12-SDAG: flat_atomic_min_num_f32 v[1:2], v0
+; GFX12-SDAG: s_endpgm
+;
+; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX12-GISEL: flat_atomic_min_num_f32 v[1:2], v0
+; GFX12-GISEL: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
+; GFX1250: ; %bb.0:
+; GFX1250: v_mov_b32_e32 v1, 0
+; GFX1250: flat_atomic_min_num_f32 v1, v0, s[0:1]
+; GFX1250: s_endpgm
+ %ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
+ ret void
+}
+
+define amdgpu_ps void @flat_atomic_fmax_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
+; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
+; GFX12-SDAG: flat_atomic_max_num_f32 v[1:2], v0
+; GFX12-SDAG: s_endpgm
+;
+; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX12-GISEL: flat_atomic_max_num_f32 v[1:2], v0
+; GFX12-GISEL: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
+; GFX1250: ; %bb.0:
+; GFX1250: v_mov_b32_e32 v1, 0
+; GFX1250: flat_atomic_max_num_f32 v1, v0, s[0:1]
+; GFX1250: s_endpgm
+ %ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
+ ret void
+}
+
+define amdgpu_ps float @flat_atomic_fmin_num_f32_rtn_saddr(ptr inreg %ptr, float %data, ptr %out) {
+; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG: v_dual_mov_b32 v3, s0 :: v_dual_mov_b32 v4, s1
+; GFX12-SDAG: flat_atomic_min_num_f32 v0, v[3:4], v0 th:TH_ATOMIC_RETURN
+; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG: flat_store_b32 v[1:2], v0
+; GFX12-SDAG: s_wait_dscnt 0x0
+; GFX12-SDAG: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
+; GFX12-GISEL: flat_atomic_min_num_f32 v0, v[3:4], v0 th:TH_ATOMIC_RETURN
+; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL: flat_store_b32 v[1:2], v0
+; GFX12-GISEL: s_wait_dscnt 0x0
+; GFX12-GISEL: ; return to shader part epilog
+;
+; GFX1250-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG: v_mov_b32_e32 v1, 0
+; GFX1250-SDAG: flat_atomic_min_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG: flat_store_b32 v[2:3], v0
+; GFX1250-SDAG: s_wait_dscnt 0x0
+; GFX1250-SDAG: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL: v_mov_b32_e32 v1, 0
+; GFX1250-GISEL: flat_atomic_min_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL: flat_store_b32 v[4:5], v0
+; GFX1250-GISEL: s_wait_dscnt 0x0
+; GFX1250-GISEL: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
+ store float %ret, ptr %out
+ ret float %ret
+}
+
+define amdgpu_ps float @flat_atomic_fmax_num_f32_rtn_saddr(ptr inreg %ptr, float %data, ptr %out) {
+; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG: v_dual_mov_b32 v3, s0 :: v_dual_mov_b32 v4, s1
+; GFX12-SDAG: flat_atomic_max_num_f32 v0, v[3:4], v0 th:TH_ATOMIC_RETURN
+; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG: flat_store_b32 v[1:2], v0
+; GFX12-SDAG: s_wait_dscnt 0x0
+; GFX12-SDAG: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
+; GFX12-GISEL: flat_atomic_max_num_f32 v0, v[3:4], v0 th:TH_ATOMIC_RETURN
+; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL: flat_store_b32 v[1:2], v0
+; GFX12-GISEL: s_wait_dscnt 0x0
+; GFX12-GISEL: ; return to shader part epilog
+;
+; GFX1250-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG: v_mov_b32_e32 v1, 0
+; GFX1250-SDAG: flat_atomic_max_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG: flat_store_b32 v[2:3], v0
+; GFX1250-SDAG: s_wait_dscnt 0x0
+; GFX1250-SDAG: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL: v_mov_b32_e32 v1, 0
+; GFX1250-GISEL: flat_atomic_max_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL: flat_store_b32 v[4:5], v0
+; GFX1250-GISEL: s_wait_dscnt 0x0
+; GFX1250-GISEL: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
ret float %ret
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
index d5250581a6ca4..1a859815e5e92 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
@@ -1,35 +1,35 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
-; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
+; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
declare float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
declare float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
define amdgpu_cs void @global_atomic_fmin_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
; GFX12-LABEL: global_atomic_fmin_num_f32_noret:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: global_atomic_min_num_f32 v[0:1], v2, off
+; GFX12: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_cs void @global_atomic_fmax_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
; GFX12-LABEL: global_atomic_fmax_num_f32_noret:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: global_atomic_max_num_f32 v[0:1], v2, off
+; GFX12: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_cs void @global_atomic_fmax_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
; GFX12-LABEL: global_atomic_fmax_num_f32_rtn:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: global_store_b32 v[3:4], v0, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt 0x0
+; GFX12: global_store_b32 v[3:4], v0, off
+; GFX12: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
@@ -37,15 +37,61 @@ define amdgpu_cs void @global_atomic_fmax_num_f32_rtn(ptr addrspace(1) %ptr, flo
define amdgpu_cs void @global_atomic_fmin_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
; GFX12-LABEL: global_atomic_fmin_num_f32_rtn:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: global_store_b32 v[3:4], v0, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt 0x0
+; GFX12: global_store_b32 v[3:4], v0, off
+; GFX12: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_ps void @global_atomic_fmin_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
+; GFX12-LABEL: global_atomic_fmin_num_f32_noret_saddr:
+; GFX12: ; %bb.0:
+; GFX12: v_mov_b32_e32 v1, 0
+; GFX12: global_atomic_min_num_f32 v1, v0, s[0:1]
+; GFX12: s_endpgm
+ %ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
+ ret void
+}
+
+define amdgpu_ps void @global_atomic_fmax_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
+; GFX12-LABEL: global_atomic_fmax_num_f32_noret_saddr:
+; GFX12: ; %bb.0:
+; GFX12: v_mov_b32_e32 v1, 0
+; GFX12: global_atomic_max_num_f32 v1, v0, s[0:1]
+; GFX12: s_endpgm
+ %ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
+ ret void
+}
+
+define amdgpu_ps void @global_atomic_fmin_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
+; GFX12-LABEL: global_atomic_fmin_num_f32_rtn_saddr:
+; GFX12: ; %bb.0:
+; GFX12: v_mov_b32_e32 v3, 0
+; GFX12: global_atomic_min_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt 0x0
+; GFX12: global_store_b32 v[1:2], v0, off
+; GFX12: s_endpgm
+ %ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
+ store float %ret, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @global_atomic_fmax_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
+; GFX12-LABEL: global_atomic_fmax_num_f32_rtn_saddr:
+; GFX12: ; %bb.0:
+; GFX12: v_mov_b32_e32 v3, 0
+; GFX12: global_atomic_max_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
+; GFX12: s_wait_loadcnt 0x0
+; GFX12: global_store_b32 v[1:2], v0, off
+; GFX12: s_endpgm
+ %ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
+ store float %ret, ptr addrspace(1) %out
+ ret void
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
More information about the llvm-commits
mailing list