[llvm] [AMDGPU][GISel] Add RegBankLegalize support for G_STRICT_FSUB (PR #173929)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 29 15:02:21 PST 2025
https://github.com/chinmaydd created https://github.com/llvm/llvm-project/pull/173929
None
>From 78e764d3df0d4c1a00cf4efcca22bf170d9c025e Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Mon, 29 Dec 2025 17:54:53 -0500
Subject: [PATCH] [AMDGPU][GISel] Add RegBankLegalize support for G_STRICT_FSUB
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 2 +-
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll | 59 +++++++++++++------
llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll | 8 +--
3 files changed, 46 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 94abccb808136..ab17d9e863d2b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -964,7 +964,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
hasSALUFloat)
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
- addRulesForGOpcs({G_FSUB}, Standard)
+ addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 45cc77486b509..0787e0d0551fc 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG-FAKE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
; FIXME: promotion not handled without f16 insts
@@ -522,6 +522,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX9-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
@@ -555,10 +556,12 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
-; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
@@ -597,7 +600,9 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX11-GISEL-FAKE16-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX11-GISEL-FAKE16-NEXT: v_sub_f16_e32 v2, v4, v5
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
@@ -630,8 +635,11 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-GISEL-NEXT: v_sub_f16_e32 v2, v4, v5
; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
@@ -913,8 +921,8 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
;
; GFX9-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-GISEL-NEXT: v_pk_add_f16 v0, s2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, s3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX8-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
@@ -931,15 +939,22 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
;
; GFX8-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_xor_b32 s0, s3, 0x80008000
-; GFX8-GISEL-NEXT: s_lshr_b32 s1, s2, 16
-; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-GISEL-NEXT: v_add_f16_e32 v0, s2, v0
-; GFX8-GISEL-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_lshr_b32 s1, s2, 16
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-GISEL-NEXT: v_add_f16_e32 v0, s1, v0
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s1, v0
+; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-GISEL-NEXT: s_lshl_b32 s1, s1, 16
+; GFX8-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
@@ -994,7 +1009,15 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
;
; GFX12-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: v_pk_add_f16 v0, s2, s3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX12-GISEL-NEXT: s_lshr_b32 s0, s3, 16
+; GFX12-GISEL-NEXT: s_xor_b32 s1, s3, 0x8000
+; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x8000
+; GFX12-GISEL-NEXT: s_lshr_b32 s3, s2, 16
+; GFX12-GISEL-NEXT: s_add_f16 s1, s2, s1
+; GFX12-GISEL-NEXT: s_add_f16 s0, s3, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_pack_ll_b32_b16 s0, s1, s0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: ; return to shader part epilog
; GFX10PLUS-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
index 3e6db4d4ac6c8..3ab1de1bed70c 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
More information about the llvm-commits
mailing list