[llvm] [AMDGPU][GlobalIsel] Add register bank legalization rules for amdgcn_interp_inreg (PR #187248)
Syadus Sefat via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 08:02:09 PDT 2026
https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/187248
>From ac0700400eb503331bde6664a84a265a966562dd Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Wed, 18 Mar 2026 07:01:18 -0500
Subject: [PATCH] [AMDGPU][GlobalIsel] Add register bank legalization rules for
amdgcn_interp_inreg
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 ++
.../GlobalISel/llvm.amdgcn.interp.inreg.ll | 153 ++++++++++++++++--
2 files changed, 154 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 42935a1500bc5..a1400be225ca6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1717,4 +1717,15 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
.Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
+ addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
+ amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
+ Standard)
+ .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
+ .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
+
+ addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
+ Standard)
+ .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
+ .Div(S16, {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
+
} // end initialize rules
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index 1ad8490c3f2f4..99da4b425b8ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
; GFX11-LABEL: v_interp_f32:
@@ -389,9 +389,15 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v2, v3 wait_exp:7
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
-; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v0
+; GFX12-TRUE16-NEXT: s_cvt_f16_f32 s0, s0
+; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-TRUE16-NEXT: s_add_f16 s0, s0, s1
+; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, s0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-FAKE16-LABEL: v_interp_f16_imm_params:
@@ -401,9 +407,15 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
; GFX12-FAKE16-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v1, v0
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v0
+; GFX12-FAKE16-NEXT: s_cvt_f16_f32 s0, s0
+; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-FAKE16-NEXT: s_add_f16 s0, s0, s1
+; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0)
@@ -413,6 +425,127 @@ main_body:
ret half %res
}
+define amdgpu_ps i32 @s_interp_p2(float inreg %p, float inreg %j, float inreg %tmp) #0 {
+; GFX11-LABEL: s_interp_p2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_interp_p2_f32 v0, v0, v1, v2 wait_exp:7
+; GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: s_interp_p2:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_interp_p2_f32 v0, v0, v1, v2 wait_exp:7
+; GFX12-NEXT: v_readfirstlane_b32 s0, v0
+; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT: ; return to shader part epilog
+ %res = call float @llvm.amdgcn.interp.inreg.p2(float %p, float %j, float %tmp)
+ %res_int = bitcast float %res to i32
+ ret i32 %res_int
+}
+
+define amdgpu_ps i32 @s_interp_f32(float inreg %p, float inreg %i, float inreg %j) #0 {
+; GFX11-LABEL: s_interp_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_interp_p10_f32 v1, v0, v1, v0 wait_exp:7
+; GFX11-NEXT: v_interp_p2_f32 v0, v0, v2, v1 wait_exp:7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: s_interp_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_interp_p10_f32 v1, v0, v1, v0 wait_exp:7
+; GFX12-NEXT: v_interp_p2_f32 v0, v0, v2, v1 wait_exp:7
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readfirstlane_b32 s0, v0
+; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT: ; return to shader part epilog
+ %tmp = call float @llvm.amdgcn.interp.inreg.p10(float %p, float %i, float %p)
+ %res = call float @llvm.amdgcn.interp.inreg.p2(float %p, float %j, float %tmp)
+ %res_int = bitcast float %res to i32
+ ret i32 %res_int
+}
+
+define amdgpu_ps float @s_interp_p10_rtz_f16(float inreg %p, float inreg %i, float inreg %j) #0 {
+; GFX11-TRUE16-LABEL: s_interp_p10_rtz_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v0.l, v1, v0.l wait_exp:7
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_interp_p10_rtz_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v0, v1, v0 wait_exp:7
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: s_interp_p10_rtz_f16:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v0.l, v1, v0.l wait_exp:7
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: s_interp_p10_rtz_f16:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v0, v1, v0 wait_exp:7
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
+ %res = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p, float %i, float %p, i1 0)
+ ret float %res
+}
+
+define amdgpu_ps half @s_interp_rtz_f16(float inreg %p, float inreg %j, float inreg %tmp) #0 {
+; GFX11-TRUE16-LABEL: s_interp_rtz_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.l, v0.l, v1, v2 wait_exp:7
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_interp_rtz_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v0, v0, v1, v2 wait_exp:7
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: s_interp_rtz_f16:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.l, v0.l, v1, v2 wait_exp:7
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: s_interp_rtz_f16:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-FAKE16-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v0, v0, v1, v2 wait_exp:7
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
+ %res = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p, float %j, float %tmp, i1 0)
+ ret half %res
+}
+
declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
More information about the llvm-commits
mailing list