[llvm] [AMDGPU] Fix mode register pass for constrained FP operations (PR #90085)

Mon Apr 29 22:48:15 PDT 2024

https://github.com/abhigargrepo updated https://github.com/llvm/llvm-project/pull/90085

>From 341ac99596306fef89390eb9d4bb02f5dc35c669 Mon Sep 17 00:00:00 2001
From: Abhinav <abhinav.garg at amd.com>
Date: Thu, 25 Apr 2024 22:16:51 +0530
Subject: [PATCH 1/3] [AMDGPU] Fix mode register pass for constrained FP
 operations

This PR will fix the si-mode-register pass which is inserting an extra setreg instruction
in case of constrained FP operations. This pass will be ignored for strictfp functions.
---
 llvm/lib/Target/AMDGPU/SIModeRegister.cpp             | 3 +++
 llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index c01b1266a5530a..32a889279763a9 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -430,6 +430,9 @@ void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
 }
 
 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  if (F.hasFnAttribute(llvm::Attribute::StrictFP))
+    return Changed;
   BlockInfo.resize(MF.getNumBlockIDs());
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
index 2403aeaa4428ad..edfaa7debe2f84 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
@@ -9,8 +9,7 @@ define double @ignoreStrictfp(double noundef %a, double noundef %b) #0 {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1
-; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
+; GCN-NOT:     s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
 ; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   tail call void @llvm.amdgcn.s.setreg(i32 2177, i32 1)
@@ -24,8 +23,7 @@ define double @set_fpenv(double noundef %a, double noundef %b) #0 {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 4
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0
-; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
+; GCN-NOT:     s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
 ; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:

>From 3f5392de2d4322f5a6d2fc973c218f59fadb34c6 Mon Sep 17 00:00:00 2001
From: Abhinav <abhinav.garg at amd.com>
Date: Fri, 26 Apr 2024 01:26:52 +0530
Subject: [PATCH 2/3] [AMDGPU] Fix mode register pass for constrained FP
 operations

This PR will fix the si-mode-register pass which is inserting an extra setreg instruction
in case of constrained FP operations. This pass will be ignored for strictfp functions.
---
 llvm/lib/Target/AMDGPU/SIModeRegister.cpp             | 4 ++++
 llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 32a889279763a9..44d72f02d04bf4 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -430,6 +430,10 @@ void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
 }
 
 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+  // This pass should not modify the rounding mode in case 
+  // of constrained FP operations with dynamic rounding modes.
+  // As per llvm lang ref, functions with such constrained
+  // operations must have strictfp attribute.
   const Function &F = MF.getFunction();
   if (F.hasFnAttribute(llvm::Attribute::StrictFP))
     return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
index edfaa7debe2f84..8a29229c152fe8 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fpconstrain.ll
@@ -9,7 +9,6 @@ define double @ignoreStrictfp(double noundef %a, double noundef %b) #0 {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1
-; GCN-NOT:     s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
 ; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   tail call void @llvm.amdgcn.s.setreg(i32 2177, i32 1)
@@ -23,7 +22,6 @@ define double @set_fpenv(double noundef %a, double noundef %b) #0 {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 4
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0
-; GCN-NOT:     s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0
 ; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:

>From a7f5bca135990c8a69b2d02a17808f7b241745f2 Mon Sep 17 00:00:00 2001
From: Abhinav <abhinav.garg at amd.com>
Date: Tue, 30 Apr 2024 11:15:13 +0530
Subject: [PATCH 3/3] [AMDGPU] Fix mode register pass for constrained FP
 operations

This PR will fix the si-mode-register pass which is inserting an extra setreg instruction in case of constrained FP operations.
This pass will be ignored for strictfp functions.
---
 llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 44d72f02d04bf4..e7f448233ca347 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -430,10 +430,11 @@ void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
 }
 
 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
-  // This pass should not modify the rounding mode in case 
-  // of constrained FP operations with dynamic rounding modes.
-  // As per llvm lang ref, functions with such constrained
-  // operations must have strictfp attribute.
+  // Constrained FP intrinsics are used to support non-default rounding modes.
+  // strictfp attribute is required to mark functions with strict FP semantics
+  // having constrained FP intrinsics. This pass fixes up operations that uses
+  // a non-default rounding mode for non-strictfp functions. But it should not
+  // assume or modify any default rounding modes in case of strictfp functions.
   const Function &F = MF.getFunction();
   if (F.hasFnAttribute(llvm::Attribute::StrictFP))
     return Changed;