[llvm] [AMDGPU] Correctly restore FP mode in FDIV32 lowering (PR #66346)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 02:14:28 PDT 2023


https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/66346:

Addresses the FIXME for both DAGISel and GISel.

>From 221985966b3d8e7604784e458baad98386348287 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 14 Sep 2023 11:13:24 +0200
Subject: [PATCH] [AMDGPU] Correctly restore FP mode in FDIV32 lowering

Addresses the FIXME for both DAGISel and GISel.
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   37 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   39 +-
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll     | 1114 ++++++++++++-----
 llvm/test/CodeGen/AMDGPU/fdiv.ll              |  418 +++++--
 4 files changed, 1192 insertions(+), 416 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b5c746b81b91e32..2fd5f18f90e7435 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4657,6 +4657,10 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
   return true;
 }
 
+static const unsigned SPDenormModeBitField =
+    AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+    (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+
 // Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
 // to enable denorm mode. When 'Enable' is false, disable denorm mode.
 static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
@@ -4675,11 +4679,6 @@ static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
       .addImm(NewDenormModeValue);
 
   } else {
-    // Select FP32 bit field in mode register.
-    unsigned SPDenormModeBitField = AMDGPU::Hwreg::ID_MODE |
-                                    (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
-                                    (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-
     B.buildInstr(AMDGPU::S_SETREG_IMM32_B32)
       .addImm(SPDenormMode)
       .addImm(SPDenormModeBitField);
@@ -4723,10 +4722,23 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
                        .setMIFlags(Flags);
   auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
 
+  const bool PreservesDenormals = Mode.FP32Denormals == DenormalMode::getIEEE();
+  const bool HasDynamicDenormals =
+      (Mode.FP32Denormals.Input == DenormalMode::Dynamic) ||
+      (Mode.FP32Denormals.Output == DenormalMode::Dynamic);
+
   // FIXME: Doesn't correctly model the FP mode switch, and the FP operations
   // aren't modeled as reading it.
-  if (Mode.FP32Denormals == DenormalMode::getPreserveSign())
+  Register SavedSPDenormMode;
+  if (!PreservesDenormals) {
+    if (HasDynamicDenormals) {
+      SavedSPDenormMode = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      B.buildInstr(AMDGPU::S_GETREG_B32)
+          .addDef(SavedSPDenormMode)
+          .addImm(SPDenormModeBitField);
+    }
     toggleSPDenormMode(true, B, ST, Mode);
+  }
 
   auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
   auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
@@ -4735,10 +4747,15 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
   auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
   auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
 
-  // FIXME: This mishandles dynamic denormal mode. We need to query the
-  // current mode and restore the original.
-  if (Mode.FP32Denormals == DenormalMode::getPreserveSign())
-    toggleSPDenormMode(false, B, ST, Mode);
+  if (!PreservesDenormals) {
+    if (HasDynamicDenormals) {
+      assert(SavedSPDenormMode);
+      B.buildInstr(AMDGPU::S_SETREG_B32)
+          .addReg(SavedSPDenormMode)
+          .addImm(SPDenormModeBitField);
+    } else
+      toggleSPDenormMode(false, B, ST, Mode);
+  }
 
   auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
                   .addUse(Fma4.getReg(0))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 998904bf08820c0..6f7cec17ab854e4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9589,28 +9589,44 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
   const DenormalMode DenormMode = Info->getMode().FP32Denormals;
 
-  const bool HasFP32Denormals = DenormMode != DenormalMode::getPreserveSign();
+  const bool PreservesDenormals = DenormMode == DenormalMode::getIEEE();
+  const bool HasDynamicDenormals =
+      (DenormMode.Input == DenormalMode::Dynamic) ||
+      (DenormMode.Output == DenormalMode::Dynamic);
 
-  if (!HasFP32Denormals) {
+  SDValue SavedDenormMode;
+
+  if (!PreservesDenormals) {
     // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
     // lowering. The chain dependence is insufficient, and we need glue. We do
     // not need the glue variants in a strictfp function.
 
     SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
 
+    SDValue Glue = DAG.getEntryNode();
+    if (HasDynamicDenormals) {
+      SDNode *GetReg = DAG.getMachineNode(AMDGPU::S_GETREG_B32, SL,
+                                          DAG.getVTList(MVT::i32, MVT::Glue),
+                                          {BitField, Glue});
+      SavedDenormMode = SDValue(GetReg, 0);
+
+      Glue = DAG.getMergeValues(
+          {DAG.getEntryNode(), SDValue(GetReg, 0), SDValue(GetReg, 1)}, SL);
+    }
+
     SDNode *EnableDenorm;
     if (Subtarget->hasDenormModeInst()) {
       const SDValue EnableDenormValue =
           getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
 
-      EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
-                                 DAG.getEntryNode(), EnableDenormValue).getNode();
+      EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, Glue,
+                                 EnableDenormValue)
+                         .getNode();
     } else {
       const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
                                                         SL, MVT::i32);
-      EnableDenorm =
-          DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
-                             {EnableDenormValue, BitField, DAG.getEntryNode()});
+      EnableDenorm = DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+                                        {EnableDenormValue, BitField, Glue});
     }
 
     SDValue Ops[3] = {
@@ -9640,12 +9656,12 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
   SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
                              NumeratorScaled, Fma3, Flags);
 
-  if (!HasFP32Denormals) {
+  if (!PreservesDenormals) {
     // FIXME: This mishandles dynamic denormal mode. We need to query the
     // current mode and restore the original.
 
     SDNode *DisableDenorm;
-    if (Subtarget->hasDenormModeInst()) {
+    if (!HasDynamicDenormals && Subtarget->hasDenormModeInst()) {
       const SDValue DisableDenormValue = getSPDenormModeValue(
           FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
 
@@ -9653,8 +9669,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
                                   Fma4.getValue(1), DisableDenormValue,
                                   Fma4.getValue(2)).getNode();
     } else {
+      assert(HasDynamicDenormals == (bool)SavedDenormMode);
       const SDValue DisableDenormValue =
-          DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
+          HasDynamicDenormals
+              ? SavedDenormMode
+              : DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
 
       DisableDenorm = DAG.getMachineNode(
           AMDGPU::S_SETREG_B32, SL, MVT::Other,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 55b8dd68920975f..9da75b093fc9cb5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -209,12 +209,15 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -225,12 +228,15 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -240,13 +246,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -256,13 +265,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -272,13 +284,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX89-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -288,53 +303,105 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
 ; GFX89-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_dynamic_denorm:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX10-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX10-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_dynamic_denorm:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_dynamic_denorm:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX11-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX11-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_dynamic_denorm:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_dynamic_denorm:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_dynamic_denorm:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv float %a, %b
   ret float %fdiv
 }
@@ -2547,12 +2614,15 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2563,12 +2633,15 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2578,13 +2651,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2594,13 +2670,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2610,13 +2689,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX89-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -2626,53 +2708,105 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
 ; GFX89-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX10-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX10-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX11-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX11-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -2996,12 +3130,15 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3013,12 +3150,15 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3029,13 +3169,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3046,13 +3189,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3063,13 +3209,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX89-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -3080,56 +3229,111 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX89-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, v2, v1, v2
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
-; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v2
-; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
-; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX10-NEXT:    v_mul_f32_e32 v5, v2, v4
-; GFX10-NEXT:    v_fma_f32 v6, -v3, v5, v2
-; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v4
-; GFX10-NEXT:    v_fma_f32 v2, -v3, v5, v2
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_neglhs_f32_dynamic:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v2
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v2
-; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
-; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX11-NEXT:    v_mul_f32_e32 v5, v2, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f32 v6, -v3, v5, v2
-; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f32 v2, -v3, v5, v2
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_neglhs_f32_dynamic:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v2
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_neglhs_f32_dynamic:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v3, null, v1, v1, v2
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_neglhs_f32_dynamic:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v3, null, v1, v1, v2
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, vcc_lo, v2, v1, v2
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_neglhs_f32_dynamic:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -3330,12 +3534,15 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3347,12 +3554,15 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3363,13 +3573,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3380,13 +3593,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3397,13 +3613,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX89-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -3414,56 +3633,111 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 ; GFX89-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, v0, v2, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
-; GFX10-NEXT:    v_div_scale_f32 v3, s4, v2, v2, v0
-; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX10-NEXT:    v_mul_f32_e32 v5, v2, v4
-; GFX10-NEXT:    v_fma_f32 v6, -v3, v5, v2
-; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v4
-; GFX10-NEXT:    v_fma_f32 v2, -v3, v5, v2
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_negrhs_f32_dynamic:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, s4, v2, v2, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_div_scale_f32 v3, null, v2, v2, v0
-; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
-; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX11-NEXT:    v_mul_f32_e32 v5, v2, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f32 v6, -v3, v5, v2
-; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f32 v2, -v3, v5, v2
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_negrhs_f32_dynamic:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, s4, v2, v2, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_negrhs_f32_dynamic:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v3, null, v2, v2, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_negrhs_f32_dynamic:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v3, null, v2, v2, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, vcc_lo, v0, v2, v0
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v4
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v2, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v3, v5, v2
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v3, v5, v2
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_negrhs_f32_dynamic:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -3550,12 +3824,15 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3567,12 +3844,15 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3583,13 +3863,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3600,13 +3883,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3617,13 +3903,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX89-IEEE-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -3634,53 +3923,105 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
 ; GFX89-FLUSH-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
-; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, 0x4640e400, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v2
-; GFX10-NEXT:    v_fma_f32 v5, -v1, v3, v4
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v2
-; GFX10-NEXT:    v_fma_f32 v1, -v1, v3, v4
-; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
-; GFX10-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_constrhs0_dynamic:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX10-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, 0x4640e400, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v2
-; GFX11-NEXT:    v_fma_f32 v5, -v1, v3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v2
-; GFX11-NEXT:    v_fma_f32 v1, -v1, v3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
-; GFX11-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_constrhs0_dynamic:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX10-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_constrhs0_dynamic:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX11-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_constrhs0_dynamic:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX11-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_constrhs0_dynamic:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -3862,12 +4203,15 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3879,12 +4223,15 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3895,13 +4242,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3912,13 +4262,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3929,13 +4282,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX89-IEEE-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -3946,53 +4302,105 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
 ; GFX89-FLUSH-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
-; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, 0x4640e400, v0, 0x4640e400
-; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v2
-; GFX10-NEXT:    v_fma_f32 v5, -v1, v3, v4
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v2
-; GFX10-NEXT:    v_fma_f32 v1, -v1, v3, v4
-; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
-; GFX10-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_constlhs0_dynamic:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX10-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, 0x4640e400, v0, 0x4640e400
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v2
-; GFX11-NEXT:    v_fma_f32 v5, -v1, v3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v2
-; GFX11-NEXT:    v_fma_f32 v1, -v1, v3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
-; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_constlhs0_dynamic:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX10-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_constlhs0_dynamic:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX11-IEEE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_constlhs0_dynamic:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v2, v4, v2
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v4, v5, v2
+; GFX11-FLUSH-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_constlhs0_dynamic:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -4173,12 +4581,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4189,12 +4600,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4204,13 +4618,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4220,13 +4637,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4236,13 +4656,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX89-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -4252,53 +4675,105 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX89-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX10-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX10-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX11-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX11-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
@@ -4495,12 +4970,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-IEEE-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-IEEE-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-IEEE-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4511,12 +4989,15 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FLUSH-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FLUSH-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FLUSH-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4526,13 +5007,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-IEEE-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-IEEE-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-IEEE-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-IEEE-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4542,13 +5026,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-FLUSH-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-FLUSH-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FLUSH-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4558,13 +5045,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX89-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-IEEE-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-IEEE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-IEEE-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-IEEE-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-IEEE-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-IEEE-NEXT:    s_setpc_b64 s[30:31]
@@ -4574,53 +5064,105 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX89-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX89-FLUSH-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX89-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX89-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX89-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX89-FLUSH-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX89-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX89-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX89-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX89-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX89-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX10-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX10-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
+; GFX10-IEEE:       ; %bb.0:
+; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-IEEE-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-IEEE-NEXT:    s_denorm_mode 15
+; GFX10-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-IEEE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v4, v5, v3
-; GFX11-NEXT:    v_fma_f32 v6, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v3
-; GFX11-NEXT:    v_fma_f32 v2, -v2, v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v4
-; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
+; GFX10-FLUSH:       ; %bb.0:
+; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-FLUSH-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-FLUSH-NEXT:    s_denorm_mode 3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX10-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
+; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX10-FLUSH-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-IEEE-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
+; GFX11-IEEE:       ; %bb.0:
+; GFX11-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-IEEE-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-IEEE-NEXT:    s_denorm_mode 15
+; GFX11-IEEE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-IEEE-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-IEEE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-IEEE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FLUSH-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
+; GFX11-FLUSH:       ; %bb.0:
+; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-FLUSH-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-FLUSH-NEXT:    s_denorm_mode 3
+; GFX11-FLUSH-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v3, v5, v3
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX11-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-FLUSH-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
+; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX11-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX11-FLUSH-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    CF_END
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index 415f9087a372dc1..e9643aa65402bbe 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -2416,13 +2416,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2432,13 +2435,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2448,13 +2454,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -2464,13 +2473,16 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -2479,14 +2491,17 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -2495,15 +2510,18 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -2708,13 +2726,16 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2724,13 +2745,16 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -2740,13 +2764,16 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -2756,13 +2783,16 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -2771,14 +2801,17 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -2787,15 +2820,18 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -3257,13 +3293,16 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3274,13 +3313,16 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3291,13 +3333,16 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX7-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
 ; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
 ; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3308,13 +3353,16 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
 ; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
 ; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
 ; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3324,14 +3372,17 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
 ; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
 ; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
 ; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
 ; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3341,15 +3392,18 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
 ; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
 ; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
 ; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -3813,13 +3867,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3829,13 +3886,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -3845,13 +3905,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -3861,13 +3924,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -3876,14 +3942,17 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -3892,15 +3961,18 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -4361,13 +4433,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4378,13 +4453,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4395,13 +4473,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX7-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
 ; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
 ; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4412,13 +4493,16 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
 ; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
 ; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
 ; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4428,14 +4512,17 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
-; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
 ; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
 ; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
 ; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
 ; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4445,15 +4532,18 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
-; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
 ; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
 ; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
 ; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
 ; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
 ; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
@@ -4919,13 +5009,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4935,13 +5028,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -4951,13 +5047,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -4967,13 +5066,16 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -4982,14 +5084,17 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, -v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -4998,15 +5103,18 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, -v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -5460,13 +5568,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -5476,13 +5587,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -5492,13 +5606,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -5508,13 +5625,16 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -5523,14 +5643,17 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, -v1, -v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -5539,15 +5662,18 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, -v1, -v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -5986,13 +6112,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -6003,13 +6132,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -6020,13 +6152,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
 ; GFX7-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -6037,13 +6172,16 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
 ; GFX8-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -6052,14 +6190,17 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
 ; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
 ; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
 ; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -6068,15 +6209,18 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
+; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
+; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
 ; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
 ; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -6474,13 +6618,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -6491,13 +6638,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -6508,13 +6658,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
 ; GFX7-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
 ; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
 ; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -6525,13 +6678,16 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
 ; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
 ; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
 ; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
 ; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -6540,14 +6696,17 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
-; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
-; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
 ; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
 ; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
 ; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -6556,15 +6715,18 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
+; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
-; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
+; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
 ; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
 ; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
 ; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
 ; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -7005,13 +7167,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -7021,13 +7186,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -7037,13 +7205,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -7053,13 +7224,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -7068,14 +7242,17 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -7084,15 +7261,18 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -7540,13 +7720,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -7556,13 +7739,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
@@ -7572,13 +7758,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
 ; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
+; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
 ; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
 ; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -7588,13 +7777,16 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
 ; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
+; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
 ; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
+; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
 ; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
 ; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
 ; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
 ; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
+; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -7603,14 +7795,17 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
-; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
 ; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
+; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX10-NEXT:    s_denorm_mode 15
+; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
 ; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -7619,15 +7814,18 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
 ; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX11-NEXT:    s_denorm_mode 15
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
+; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
 ; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
 ; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]



More information about the llvm-commits mailing list