[llvm] [AMDGPU][GISel] Add RegBankLegalize support for G_STRICT_{FADD|FSUB|FMUL} (PR #169406)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 12:59:44 PST 2025
https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/169406
>From 0a29f910d59536a6e6d6e19307fd8f2fd89fc9f9 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Mon, 24 Nov 2025 15:49:50 -0500
Subject: [PATCH 1/2] [AMDGPU][GISel] Add RegBankLegalize support for
G_STRICT_{FADD|FSUB|FMUL}
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 22 ++++++
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 5 +-
llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll | 49 +++++++-----
llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll | 25 +++++--
llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll | 74 +++++++++++++++++--
llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll | 27 ++++---
llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll | 6 +-
llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll | 17 +----
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll | 42 +++++++----
llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll | 8 +-
llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll | 17 +----
11 files changed, 201 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 6ec51e1be8aca..1cf8ca50f21e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -120,6 +120,10 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
case UniV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
+ case UniV2S32:
+ return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
+ case UniV2S64:
+ return MRI.getType(Reg) == LLT::fixed_vector(2, 64) && MUI.isUniform(Reg);
case UniB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
case UniB64:
@@ -160,6 +164,10 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
case DivV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
+ case DivV2S32:
+ return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
+ case DivV2S64:
+ return MRI.getType(Reg) == LLT::fixed_vector(2, 64) && MUI.isDivergent(Reg);
case DivB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
case DivB64:
@@ -991,6 +999,20 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
.Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
+ addRulesForGOpcs({G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL}, Standard)
+ .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
+ .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
+ .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
+ .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}})
+ .Any({{UniV2S64}, {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64}}})
+ .Any({{DivV2S64}, {{VgprV2S64}, {VgprV2S64, VgprV2S64}}});
+
using namespace Intrinsic;
addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 7e4ce7b43dc3b..c4279b4416fb3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -95,9 +95,10 @@ enum UniformityLLTOpPredicateID {
UniV2S16,
UniV2S32,
-
+ UniV2S64,
DivV2S16,
DivV2S32,
+ DivV2S64,
// B types
B32,
@@ -172,6 +173,7 @@ enum RegBankLLTMappingApplyID {
VgprPtr128,
VgprV2S16,
VgprV2S32,
+ VgprV2S64,
VgprB32,
VgprB64,
VgprB96,
@@ -187,6 +189,7 @@ enum RegBankLLTMappingApplyID {
UniInVgprS64,
UniInVgprV2S16,
UniInVgprV2S32,
+ UniInVgprV2S64,
UniInVgprV4S32,
UniInVgprB32,
UniInVgprB64,
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index a094631267e64..cc6f05775d816 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GISEL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
; FIXME: promotion not handled without f16 insts
@@ -179,12 +180,19 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX10-NEXT: v_add_f16_e32 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; SDAG-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_pk_add_f16 v0, v0, v2
+; SDAG-NEXT: v_add_f16_e32 v1, v1, v3
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_pk_add_f16 v0, v0, v2
+; GISEL-NEXT: v_pk_add_f16 v1, v1, v3
+; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; GFX11-TRUE16: ; %bb.0:
@@ -228,16 +236,23 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT: v_add_f16_e32 v0, v0, v2
-; GFX10-NEXT: v_add_f16_e32 v1, v1, v3
-; GFX10-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; SDAG-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; SDAG-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; SDAG-NEXT: v_add_f16_e32 v0, v0, v2
+; SDAG-NEXT: v_add_f16_e32 v1, v1, v3
+; SDAG-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; SDAG-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_pk_add_f16 v0, v0, v2
+; GISEL-NEXT: v_pk_add_f16 v1, v1, v3
+; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GFX11-TRUE16: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
index 2aecf5fd8753c..da34680e666c5 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GISEL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
@@ -206,11 +207,23 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
; GCN-NEXT: v_sub_f32_e64 v0, v1, |v0|
; GCN-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v1, |v0|
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; SDAG-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_add_f32_e64 v0, -|v0|, v1
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x) #0
%neg.fabs.x = fneg float %fabs.x
%val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
index faa0131c88c2d..c13745233e2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GISEL %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
@@ -15,6 +16,12 @@ define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_f64_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret double %val
}
@@ -31,6 +38,12 @@ define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_f64_fpexcept_ignore:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret double %val
}
@@ -47,6 +60,12 @@ define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret double %val
}
@@ -65,6 +84,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x double> %val
}
@@ -83,6 +109,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret <2 x double> %val
}
@@ -101,6 +134,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret <2 x double> %val
}
@@ -121,6 +161,14 @@ define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x double> %val
}
@@ -133,10 +181,24 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
; GCN-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
; GCN-NEXT: ; return to shader part epilog
;
-; GFX10-LABEL: s_constained_fadd_f64_fpexcept_strict:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
-; GFX10-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: s_constained_fadd_f64_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
+; GFX11-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index eed5f016aa787..b64d0ba661813 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s
; FIXME: promotion not handled without f16 insts
@@ -454,14 +454,19 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
;
; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
-; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1
+; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s0, v0
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s2
+; GFX8-GISEL-NEXT: s_lshl_b32 s0, s0, 16
+; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
index 8df2834928395..36c57888d08ac 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
index 8c98a662c59cc..e50b84a805505 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fmul_f64_fpexcept_strict:
@@ -129,17 +129,6 @@ define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3
}
define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
-; GCN-LABEL: s_constained_fmul_f64_fpexcept_strict:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: v_mov_b32_e32 v1, s5
-; GCN-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: s_constained_fmul_f64_fpexcept_strict:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
-; GFX10-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 6daea572f58c6..89a3d4a099890 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-FAKE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; FIXME: promotion not handled without f16 insts
@@ -432,6 +432,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX9-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
@@ -465,10 +466,12 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
-; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
@@ -507,7 +510,9 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX11-GISEL-FAKE16-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX11-GISEL-FAKE16-NEXT: v_sub_f16_e32 v2, v4, v5
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
@@ -742,8 +747,8 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
;
; GFX9-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-GISEL-NEXT: v_pk_add_f16 v0, s2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, s3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX8-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
@@ -760,15 +765,22 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
;
; GFX8-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_xor_b32 s0, s3, 0x80008000
-; GFX8-GISEL-NEXT: s_lshr_b32 s1, s2, 16
-; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-GISEL-NEXT: v_add_f16_e32 v0, s2, v0
-; GFX8-GISEL-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_lshr_b32 s1, s2, 16
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-GISEL-NEXT: v_add_f16_e32 v0, s1, v0
+; GFX8-GISEL-NEXT: v_readfirstlane_b32 s1, v0
+; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-GISEL-NEXT: s_lshl_b32 s1, s1, 16
+; GFX8-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
index 23dbe21379f7f..0d0945e738073 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
index e7d136c377079..d3f2510acaaf8 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fsub_f64_fpexcept_strict:
@@ -129,17 +129,6 @@ define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3
}
define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
-; GCN-LABEL: s_constained_fsub_f64_fpexcept_strict:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: v_mov_b32_e32 v1, s5
-; GCN-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1]
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: s_constained_fsub_f64_fpexcept_strict:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], -s[4:5]
-; GFX10-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
>From 5eaabb24b0ca8cb4b7024c44203913a9f361b16f Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Mon, 24 Nov 2025 15:59:20 -0500
Subject: [PATCH 2/2] [AMDGPU][GISel] Fix formatting
---
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 1cf8ca50f21e6..4101b03e455f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -999,7 +999,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
.Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
- addRulesForGOpcs({G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL}, Standard)
+ addRulesForGOpcs({G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL}, Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
More information about the llvm-commits
mailing list