[llvm-branch-commits] [llvm] AMDGPU: Skip last corrections and scaling for afn llvm.sqrt.f64 (PR #183697)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Feb 26 23:46:39 PST 2026


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/183697

Device libs has a fast sqrt macro implemented this way.

>From 4708508e561a04583cc5921c648fdfe948f22d0e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 27 Feb 2026 08:06:08 +0100
Subject: [PATCH] AMDGPU: Skip last corrections and scaling for afn
 llvm.sqrt.f64

Device libs has a fast sqrt macro implemented this way.
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  39 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  40 +-
 llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll         | 840 ++++--------------
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 768 +++-------------
 4 files changed, 368 insertions(+), 1319 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c117c6bf4ddd9..1b9028eb41487 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5856,18 +5856,21 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
   Register X = MI.getOperand(1).getReg();
   unsigned Flags = MI.getFlags();
 
-  auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767);
+  Register SqrtX = X;
+  Register Scaling, ZeroInt;
+  if (!MI.getFlag(MachineInstr::FmAfn)) {
+    auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767);
 
-  auto ZeroInt = B.buildConstant(S32, 0);
-  auto Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant);
+    ZeroInt = B.buildConstant(S32, 0).getReg(0);
+    Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant).getReg(0);
 
-  // Scale up input if it is too small.
-  auto ScaleUpFactor = B.buildConstant(S32, 256);
-  auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
-  auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
+    // Scale up input if it is too small.
+    auto ScaleUpFactor = B.buildConstant(S32, 256);
+    auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
+    SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags).getReg(0);
+  }
 
-  auto SqrtY =
-      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
+  auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX);
 
   auto Half = B.buildFConstant(F64, 0.5);
   auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
@@ -5884,15 +5887,17 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
 
   auto SqrtS2 = B.buildFMA(F64, SqrtD0, SqrtH1, SqrtS1);
 
-  auto NegSqrtS2 = B.buildFNeg(F64, SqrtS2);
-  auto SqrtD1 = B.buildFMA(F64, NegSqrtS2, SqrtS2, SqrtX);
-
-  auto SqrtRet = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2);
+  Register SqrtRet = SqrtS2.getReg(0);
+  if (!MI.getFlag(MachineInstr::FmAfn)) {
+    auto NegSqrtS2 = B.buildFNeg(F64, SqrtS2);
+    auto SqrtD1 = B.buildFMA(F64, NegSqrtS2, SqrtS2, SqrtX);
+    auto SqrtD2 = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2);
 
-  // Scale down the result.
-  auto ScaleDownFactor = B.buildConstant(S32, -128);
-  auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
-  SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags);
+    // Scale down the result.
+    auto ScaleDownFactor = B.buildConstant(S32, -128);
+    auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
+    SqrtRet = B.buildFLdexp(F64, SqrtD2, ScaleDown, Flags).getReg(0);
+  }
 
   Register IsZeroOrInf;
   if (MI.getFlag(MachineInstr::FmNoInfs)) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1e0ba25158ff4..cd8575751220b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13182,17 +13182,20 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
 
   SDValue X = Op.getOperand(0);
-  SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
-
-  SDValue Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
-
   SDValue ZeroInt = DAG.getConstant(0, DL, MVT::i32);
 
-  // Scale up input if it is too small.
-  SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
-  SDValue ScaleUp =
-      DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleUpFactor, ZeroInt);
-  SDValue SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
+  SDValue SqrtX = X;
+  SDValue Scaling;
+  if (!Flags.hasApproximateFuncs()) {
+    SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
+    Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
+
+    // Scale up input if it is too small.
+    SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
+    SDValue ScaleUp =
+        DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleUpFactor, ZeroInt);
+    SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
+  }
 
   SDValue SqrtY = DAG.getNode(AMDGPUISD::RSQ, DL, MVT::f64, SqrtX);
 
@@ -13214,16 +13217,19 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue SqrtS2 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD0, SqrtH1, SqrtS1);
 
-  SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
-  SDValue SqrtD1 =
-      DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS2, SqrtS2, SqrtX);
+  SDValue SqrtRet = SqrtS2;
+  if (!Flags.hasApproximateFuncs()) {
+    SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
+    SDValue SqrtD1 =
+        DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS2, SqrtS2, SqrtX);
 
-  SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
+    SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
 
-  SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32);
-  SDValue ScaleDown =
-      DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt);
-  SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
+    SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32);
+    SDValue ScaleDown = DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling,
+                                    ScaleDownFactor, ZeroInt);
+    SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
+  }
 
   // TODO: Check for DAZ and expand to subnormals
 
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 7cdf08800cb25..cf1cabd6d62ed 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1066,116 +1066,80 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
 define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
 ; GFX6-SDAG-LABEL: s_sqrt_f64_afn:
 ; GFX6-SDAG:       ; %bb.0:
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX6-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX6-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; GFX6-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v6, 0x260
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v7, s1
+; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX6-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-SDAG-LABEL: s_sqrt_f64_afn:
 ; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX8-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX8-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; GFX8-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v6, 0x260
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v7, s1
+; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX8-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX8-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX6-GISEL-LABEL: s_sqrt_f64_afn:
 ; GFX6-GISEL:       ; %bb.0:
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v6, 0x260
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v7, s0
+; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, s1
+; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX6-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-GISEL-LABEL: s_sqrt_f64_afn:
 ; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v6, 0x260
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v7, s0
+; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX8-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX8-GISEL-NEXT:    ; return to shader part epilog
@@ -1193,112 +1157,76 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
 define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
 ; GFX6-SDAG-LABEL: s_sqrt_f64_afn_nnan_ninf:
 ; GFX6-SDAG:       ; %bb.0:
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX6-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX6-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; GFX6-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v6, s1
+; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX6-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-SDAG-LABEL: s_sqrt_f64_afn_nnan_ninf:
 ; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX8-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX8-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; GFX8-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v6, s1
+; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX8-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX8-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX6-GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf:
 ; GFX6-GISEL:       ; %bb.0:
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v6, s0
+; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, s1
+; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX6-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf:
 ; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v6, s0
+; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX8-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX8-GISEL-NEXT:    ; return to shader part epilog
@@ -1657,27 +1585,16 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_afn:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1685,27 +1602,16 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_afn:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1713,13 +1619,9 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_afn:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1727,13 +1629,6 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1741,13 +1636,9 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_afn:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1755,13 +1646,6 @@ define double @v_sqrt_f64_afn(double %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1773,27 +1657,16 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nsz:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1801,27 +1674,16 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nsz:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1829,13 +1691,9 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nsz:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1843,13 +1701,6 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1857,13 +1708,9 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nsz:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1871,13 +1718,6 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1889,17 +1729,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_v2f64_afn:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v16, 0x260
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v16
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -1912,20 +1746,8 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -1935,17 +1757,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_v2f64_afn:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v16, 0x260
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v16
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -1958,20 +1774,8 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -1981,18 +1785,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v16, 0x260
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v16
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2007,18 +1804,6 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2028,18 +1813,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v16, 0x260
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v16
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2054,18 +1832,6 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2079,27 +1845,16 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2107,27 +1862,16 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2135,13 +1879,9 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2149,13 +1889,6 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2163,13 +1896,9 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2177,13 +1906,6 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2195,26 +1917,16 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], |v[0:1]|, v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], |v[0:1]|
+; GFX6-SDAG-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2222,26 +1934,16 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], |v[0:1]|, v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], |v[0:1]|
+; GFX8-SDAG-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2249,26 +1951,16 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], |v[0:1]|
+; GFX6-GISEL-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2276,26 +1968,16 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], |v[0:1]|
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2308,26 +1990,15 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2335,26 +2006,15 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2362,13 +2022,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2376,12 +2031,6 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2389,13 +2038,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2403,12 +2047,6 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2420,17 +2058,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -2443,19 +2074,8 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2465,17 +2085,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -2488,19 +2101,8 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2510,18 +2112,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2536,17 +2130,6 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2556,18 +2139,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2582,17 +2157,6 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2606,26 +2170,15 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX6-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX6-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2633,26 +2186,15 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT:    s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; GFX8-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; GFX8-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2660,13 +2202,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
 ; GFX6-GISEL:       ; %bb.0:
 ; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX6-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX6-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2674,12 +2211,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2687,13 +2218,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX8-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; GFX8-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2701,12 +2227,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 43bfe73515adb..238cff87f2d5f 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3567,15 +3567,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -3583,12 +3577,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3610,15 +3599,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -3627,11 +3610,6 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3653,27 +3631,16 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3692,13 +3659,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3706,13 +3669,6 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -4017,27 +3973,16 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4052,15 +3997,9 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64__afn:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4068,11 +4007,6 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4087,27 +4021,16 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64__afn:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4122,13 +4045,9 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64__afn:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4136,13 +4055,6 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4230,27 +4142,16 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4266,15 +4167,9 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4282,11 +4177,6 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4301,27 +4191,16 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4337,13 +4216,9 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4351,13 +4226,6 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4429,27 +4297,15 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4464,15 +4320,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4480,11 +4329,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4499,27 +4343,15 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4534,13 +4366,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4548,13 +4375,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4642,27 +4462,16 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4677,15 +4486,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4693,11 +4496,6 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4712,27 +4510,16 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4747,13 +4534,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4761,13 +4544,6 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4839,27 +4615,15 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4874,15 +4638,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4890,11 +4647,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4909,27 +4661,15 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4944,13 +4684,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4958,13 +4693,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5036,27 +4764,15 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5072,15 +4788,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5088,11 +4797,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5107,27 +4811,15 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5143,13 +4835,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5157,13 +4844,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5243,7 +4923,6 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -5256,7 +4935,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
 ; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -5285,9 +4964,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
 ; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5299,7 +4977,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
 ; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -5338,9 +5016,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -5377,9 +5054,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -5480,45 +5156,26 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v12, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[8:9], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[8:9], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-CG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[8:9]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 0.5
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[8:9], 0.5
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v12
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[12:13], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[14:15], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[10:11], v[12:13]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
@@ -5540,46 +5197,26 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v5, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v12
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[10:11], 0.5
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v13, 0x260
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[4:5], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[12:13], v[10:11], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[10:11], v[2:3], v[10:11]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[12:13], v[10:11], 0.5
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[10:11]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[4:5]
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[2:3]
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
@@ -5601,17 +5238,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; VI-SDAG-CG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
@@ -5624,20 +5254,8 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
@@ -5661,18 +5279,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v5, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; VI-GISEL-CG-NEXT:    v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
@@ -5687,18 +5297,6 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -5806,29 +5404,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ;
 ; SI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
 ; SI-SDAG-CG:       ; %bb.0:
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v6, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v7, s1
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, s0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5842,29 +5431,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ;
 ; SI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
 ; SI-GISEL-CG:       ; %bb.0:
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v6, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v7, s0
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, s1
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5878,29 +5458,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ;
 ; VI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
 ; VI-SDAG-CG:       ; %bb.0:
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v6, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v7, s1
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[0:1], v[0:1], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5914,29 +5485,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ;
 ; VI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
 ; VI-GISEL-CG:       ; %bb.0:
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v6, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v6
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v7, s0
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[0:1], s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, s1
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -6031,27 +5593,16 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; SI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
 ; SI-SDAG-CG:       ; %bb.0:
 ; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6066,15 +5617,9 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; SI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
 ; SI-GISEL-CG:       ; %bb.0:
 ; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -6082,11 +5627,6 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6101,27 +5641,16 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; VI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
 ; VI-SDAG-CG:       ; %bb.0:
 ; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6136,13 +5665,9 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; VI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
 ; VI-GISEL-CG:       ; %bb.0:
 ; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -6150,13 +5675,6 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
 ; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]



More information about the llvm-branch-commits mailing list