[llvm] [AMDGPU] Fix mode register pass for constrained FP operations (PR #90085)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 25 09:56:43 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Abhinav Garg (abhigargrepo)
<details>
<summary>Changes</summary>
This PR will fix the si-mode-register pass which is inserting an extra setreg instruction in case of constrained FP operations. This pass will be ignored for strictfp functions.
---
Full diff: https://github.com/llvm/llvm-project/pull/90085.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+3)
- (modified) llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index c01b1266a5530a..32a889279763a9 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -430,6 +430,9 @@ void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
}
bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (F.hasFnAttribute(llvm::Attribute::StrictFP))
+ return Changed;
BlockInfo.resize(MF.getNumBlockIDs());
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
index 2403aeaa4428ad..edfaa7debe2f84 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
@@ -9,8 +9,7 @@ define double @ignoreStrictfp(double noundef %a, double noundef %b) #0 {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
+; GCN-NOT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GCN-NEXT: s_setpc_b64 s[30:31]
tail call void @llvm.amdgcn.s.setreg(i32 2177, i32 1)
@@ -24,8 +23,7 @@ define double @set_fpenv(double noundef %a, double noundef %b) #0 {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 4
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0
-; GCN-NEXT: s_nop 0
-; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
+; GCN-NOT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/90085
More information about the llvm-commits
mailing list