[llvm] [AMDGPU] Use RTZ for newer fp16 interp instructions (PR #86235)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 20:08:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ruiling, Song (ruiling)
<details>
<summary>Changes</summary>
They need to be run under RTZ like their predecessors to work correctly.
---
Full diff: https://github.com/llvm/llvm-project/pull/86235.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/VINTERPInstructions.td (+2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll (+5-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll (+5-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index c01b1266a5530a..b7e27e4ea3738b 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -170,6 +170,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
case AMDGPU::V_INTERP_P2_F16:
+ case AMDGPU::V_INTERP_P10_F16_F32_inreg:
+ case AMDGPU::V_INTERP_P2_F16_F32_inreg:
// f16 interpolation instructions need double precision round to zero
return Status(FP_ROUND_MODE_DP(3),
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index 77063e2b70f66c..e04d686257ad90 100644
--- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -110,10 +110,12 @@ let SubtargetPredicate = HasVINTERPEncoding in {
let Uses = [M0, EXEC, MODE] in {
def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
+let FPDPRounding = 1 in {
def V_INTERP_P10_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
def V_INTERP_P2_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
+} // End FPDPRounding = 1
} // Uses = [M0, EXEC, MODE]
let Uses = [M0, EXEC] in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index 623360f6b1d9c5..b0603c622b80e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -129,12 +129,14 @@ define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m
; GCN-NEXT: s_mov_b32 exec_lo, s3
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_mov_b32_e32 v2, s1
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GCN-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
; GCN-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
; GCN-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GCN-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; GCN-NEXT: v_add_f16_e32 v0, v3, v0
; GCN-NEXT: ; return to shader part epilog
main_body:
@@ -152,9 +154,11 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GCN-NEXT: v_mov_b32_e32 v2, s1
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GCN-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
; GCN-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v1
; GCN-NEXT: v_add_f16_e32 v0, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
index 429528e9091d13..cf99b83e961b7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
@@ -129,12 +129,14 @@ define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m
; GCN-NEXT: s_mov_b32 exec_lo, s3
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_mov_b32_e32 v2, s1
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GCN-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
; GCN-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
; GCN-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GCN-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; GCN-NEXT: v_add_f16_e32 v0, v3, v0
; GCN-NEXT: ; return to shader part epilog
main_body:
@@ -152,9 +154,11 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GCN-NEXT: v_mov_b32_e32 v2, s1
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GCN-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
; GCN-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v1
; GCN-NEXT: v_add_f16_e32 v0, v1, v0
``````````
</details>
https://github.com/llvm/llvm-project/pull/86235
More information about the llvm-commits
mailing list