[llvm] AMDGPU: Introduce f64 rsq pattern in AMDGPUCodeGenPrepare (PR #172053)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 22 04:51:16 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/172053

>From fd4470f67cb0bad2a3ef0cfec3fae9f5252674c5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 12 Dec 2025 15:44:58 +0100
Subject: [PATCH 01/10] AMDGPU: Introduce f64 rsq pattern in
 AMDGPUCodeGenPrepare

Handle this here instead of DAGCombine, mostly because the f32
case is handled here due to the dependency on !fpmath. Also we can
take advantage of computeKnownFPClass.
---
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |   141 +-
 .../AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll  |   365 +-
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 10091 +++++++++-------
 3 files changed, 6154 insertions(+), 4443 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 71ea9ef6fc050..e45d0652a65ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -236,6 +236,9 @@ class AMDGPUCodeGenPrepareImpl
                       FastMathFlags FMF) const;
   Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
                           FastMathFlags FMF) const;
+  Value *emitRsqF64(IRBuilder<> &Builder, Value *X, FastMathFlags SqrtFMF,
+                    FastMathFlags DivFMF, const Instruction *CtxI,
+                    bool IsNegative) const;
 
   bool tryNarrowMathIfNoOverflow(Instruction *I);
 
@@ -605,6 +608,94 @@ static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
   return Builder.CreateFMul(Rsq, OutputScaleFactor);
 }
 
+/// Emit inverse sqrt expansion for f64 with a correction sequence on top of
+/// v_rsq_f64. This should give a 1ulp result.
+Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
+                                            FastMathFlags SqrtFMF,
+                                            FastMathFlags DivFMF,
+                                            const Instruction *CtxI,
+                                            bool IsNegative) const {
+  // rsq(x):
+  //   double y0 = BUILTIN_AMDGPU_RSQRT_F64(x);
+  //   double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0);
+  //   return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0);
+  //
+  // The rsq instruction handles the special cases correctly. We need to check
+  // for the edge case conditions to ensure the special case propagates through
+  // the later instructions.
+
+  Value *Y0 = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, X);
+
+  // Try to elide the edge case check.
+  //
+  // Fast math flags imply:
+  //   sqrt ninf => !isinf(x)
+  //   sqrt nnan => not helpful
+  //   fdiv ninf => x != 0, !isinf(x)
+  //   fdiv nnan => x != 0
+  bool MaybePosInf = !SqrtFMF.noInfs() && !DivFMF.noInfs();
+  bool MaybeZero = !DivFMF.noInfs() && !DivFMF.noNaNs();
+
+  DenormalMode DenormMode;
+  FPClassTest Interested = fcNone;
+  if (MaybeZero)
+    Interested = fcZero;
+  if (MaybePosInf)
+    Interested = fcPosInf;
+
+  if (Interested != fcNone) {
+    KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI);
+    if (KnownSrc.isKnownNeverPosInfinity())
+      MaybePosInf = false;
+
+    DenormMode = F.getDenormalMode(X->getType()->getFltSemantics());
+    if (KnownSrc.isKnownNeverLogicalZero(DenormMode))
+      MaybeZero = false;
+  }
+
+  Value *SpecialOrRsq = Y0;
+  if (MaybeZero || MaybePosInf) {
+    Value *Cond;
+    if (MaybePosInf && MaybeZero) {
+      if (DenormMode.Input != DenormalMode::DenormalModeKind::Dynamic) {
+        FPClassTest TestMask = fcPosInf | fcZero;
+        if (DenormMode.inputsAreZero())
+          TestMask |= fcSubnormal;
+
+        Cond = Builder.createIsFPClass(X, TestMask);
+      } else {
+        // Avoid using llvm.is.fpclass for dynamic denormal mode, since it
+        // doesn't respect the floating-point environment.
+        Value *IsZero =
+            Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+        Value *IsInf =
+            Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+        Cond = Builder.CreateOr(IsZero, IsInf);
+      }
+    } else if (MaybeZero) {
+      Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+    } else {
+      Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+    }
+
+    SpecialOrRsq = Builder.CreateSelect(Cond, Y0, X);
+  }
+
+  Value *NegY0 = Builder.CreateFNeg(Y0);
+  Value *NegXY0 = Builder.CreateFMul(NegY0, SpecialOrRsq);
+
+  // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64.
+  Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0));
+  Value *Y0E = Builder.CreateFMul(Y0, E);
+
+  Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375),
+                                  ConstantFP::get(X->getType(), 0.5));
+  if (IsNegative)
+    EFMA = Builder.CreateFNeg(EFMA);
+
+  return Builder.CreateFMA(Y0E, EFMA, Y0);
+}
+
 bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
                                                   FastMathFlags DivFMF,
                                                   FastMathFlags SqrtFMF) const {
@@ -612,8 +703,22 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
   if (!DivFMF.allowContract() || !SqrtFMF.allowContract())
     return false;
 
-  // v_rsq_f32 gives 1ulp
-  return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+  Type *EltTy = SqrtOp->getType()->getScalarType();
+  switch (EltTy->getTypeID()) {
+  case Type::FloatTyID:
+    // v_rsq_f32 gives 1ulp
+    // Separate correctly rounded fdiv + sqrt give ~1.81 ulp.
+
+    // FIXME: rsq formation should not depend on approx func or the fpmath
+    // accuracy. This strictly improves precision.
+    return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+  case Type::DoubleTyID:
+    return true;
+  default:
+    return false;
+  }
+
+  llvm_unreachable("covered switch");
 }
 
 Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
@@ -629,8 +734,6 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
   if (!CLHS)
     return nullptr;
 
-  assert(Den->getType()->isFloatTy());
-
   bool IsNegative = false;
 
   // TODO: Handle other numerator values with arcp.
@@ -639,14 +742,20 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
     IRBuilder<>::FastMathFlagGuard Guard(Builder);
     Builder.setFastMathFlags(DivFMF | SqrtFMF);
 
-    if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
-        canIgnoreDenormalInput(Den, CtxI)) {
-      Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
-      // -1.0 / sqrt(x) -> fneg(rsq(x))
-      return IsNegative ? Builder.CreateFNeg(Result) : Result;
+    if (Den->getType()->isFloatTy()) {
+      if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
+          canIgnoreDenormalInput(Den, CtxI)) {
+        Value *Result =
+            Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
+        // -1.0 / sqrt(x) -> fneg(rsq(x))
+        return IsNegative ? Builder.CreateFNeg(Result) : Result;
+      }
+
+      return emitRsqIEEE1ULP(Builder, Den, IsNegative);
     }
 
-    return emitRsqIEEE1ULP(Builder, Den, IsNegative);
+    if (Den->getType()->isDoubleTy())
+      return emitRsqF64(Builder, Den, SqrtFMF, DivFMF, CtxI, IsNegative);
   }
 
   return nullptr;
@@ -758,6 +867,9 @@ Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
       return Rsq;
   }
 
+  if (!Num->getType()->isFloatTy())
+    return nullptr;
+
   Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
   if (Rcp)
     return Rcp;
@@ -793,7 +905,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
     return false;
 
   Type *Ty = FDiv.getType()->getScalarType();
-  if (!Ty->isFloatTy())
+  const bool IsFloat = Ty->isFloatTy();
+  if (!IsFloat && !Ty->isDoubleTy())
     return false;
 
   // The f64 rcp/rsq approximations are pretty inaccurate. We can do an
@@ -818,6 +931,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
       RsqOp = SqrtOp->getOperand(0);
   }
 
+  // rcp path not yet implemented for f64.
+  if (!IsFloat && !RsqOp)
+    return false;
+
   // Inaccurate rcp is allowed with afn.
   //
   // Defer to codegen to handle this.
@@ -832,7 +949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
     return false;
 
   // Defer the correct implementations to codegen.
-  if (ReqdAccuracy < 1.0f)
+  if (IsFloat && ReqdAccuracy < 1.0f)
     return false;
 
   IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index b97cd91f2ab32..764b10a7d1987 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -4,8 +4,15 @@
 define double @rsq_f64(double %x) {
 ; CHECK-LABEL: define double @rsq_f64(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -16,8 +23,16 @@ define double @rsq_f64(double %x) {
 define double @neg_rsq_f64(double %x) {
 ; CHECK-LABEL: define double @neg_rsq_f64(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP9:%.*]] = fneg contract double [[TMP8]]
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -28,8 +43,15 @@ define double @neg_rsq_f64(double %x) {
 define double @rsq_f64_nnan(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -40,8 +62,16 @@ define double @rsq_f64_nnan(double %x) {
 define double @neg_rsq_f64_nnan(double %x) {
 ; CHECK-LABEL: define double @neg_rsq_f64_nnan(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP9:%.*]] = fneg nnan contract double [[TMP8]]
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -52,8 +82,13 @@ define double @neg_rsq_f64_nnan(double %x) {
 define double @rsq_f64_ninf(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -64,8 +99,14 @@ define double @rsq_f64_ninf(double %x) {
 define double @neg_rsq_f64_ninf(double %x) {
 ; CHECK-LABEL: define double @neg_rsq_f64_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP7:%.*]] = fneg ninf contract double [[TMP6]]
+; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -76,8 +117,13 @@ define double @neg_rsq_f64_ninf(double %x) {
 define double @rsq_f64_nnan_ninf(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -88,8 +134,14 @@ define double @rsq_f64_nnan_ninf(double %x) {
 define double @neg_rsq_f64_nnan_ninf(double %x) {
 ; CHECK-LABEL: define double @neg_rsq_f64_nnan_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP7:%.*]] = fneg nnan ninf contract double [[TMP6]]
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -100,8 +152,15 @@ define double @neg_rsq_f64_nnan_ninf(double %x) {
 define double @rsq_f64_sqrt_nnan_ninf(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_sqrt_nnan_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select nnan ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -112,8 +171,13 @@ define double @rsq_f64_sqrt_nnan_ninf(double %x) {
 define double @rsq_f64_fdiv_nnan_ninf(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_fdiv_nnan_ninf(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -125,7 +189,30 @@ define <2 x double> @rsq_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @rsq_v2f64(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
 ; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract <2 x double> splat (double 1.000000e+00), [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT:    [[FDIV:%.*]] = insertelement <2 x double> [[TMP23]], double [[TMP22]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[FDIV]]
 ;
   %sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -137,7 +224,30 @@ define <2 x double> @neg_rsq_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @neg_rsq_v2f64(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
 ; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract <2 x double> splat (double 1.000000e+00), [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT:    [[FDIV:%.*]] = insertelement <2 x double> [[TMP23]], double [[TMP22]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[FDIV]]
 ;
   %sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -149,7 +259,31 @@ define <2 x double> @mixed_sign_rsq_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @mixed_sign_rsq_v2f64(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
 ; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract <2 x double> <double 1.000000e+00, double -1.000000e+00>, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP22:%.*]] = fneg contract double [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP22]], double [[TMP14]])
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT:    [[FDIV:%.*]] = insertelement <2 x double> [[TMP24]], double [[TMP23]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[FDIV]]
 ;
   %sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -161,7 +295,22 @@ define <2 x double> @rsq_some_elements_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @rsq_some_elements_v2f64(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
 ; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract <2 x double> <double 1.000000e+00, double 2.000000e+00>, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP14:%.*]] = fdiv contract double 2.000000e+00, [[TMP2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT:    [[FDIV:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[FDIV]]
 ;
   %sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -324,8 +473,15 @@ define double @rsq_amdgcn_f64_nnan_ninf(double %x) {
 define double @rsq_f64_input_known_not_zero(double nofpclass(zero) %x) {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_zero(
 ; CHECK-SAME: double nofpclass(zero) [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -336,8 +492,15 @@ define double @rsq_f64_input_known_not_zero(double nofpclass(zero) %x) {
 define double @rsq_f64_input_known_not_pinf(double nofpclass(pinf) %x) {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf(
 ; CHECK-SAME: double nofpclass(pinf) [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -348,8 +511,13 @@ define double @rsq_f64_input_known_not_pinf(double nofpclass(pinf) %x) {
 define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x) {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero(
 ; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -360,8 +528,15 @@ define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x)
 define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(double nofpclass(pinf zero) %x) #0 {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(
 ; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -372,8 +547,15 @@ define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(double nofpclass(pin
 define double @rsq_f64_input_known_not_pinf_zero_daz(double nofpclass(pinf zero) %x) #1 {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_daz(
 ; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -384,8 +566,13 @@ define double @rsq_f64_input_known_not_pinf_zero_daz(double nofpclass(pinf zero)
 define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pinf zero sub) %x) #1 {
 ; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(
 ; CHECK-SAME: double nofpclass(pinf zero sub) [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -396,8 +583,17 @@ define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pin
 define double @rsq_f64_dynamic_denormal(double %x) #0 {
 ; CHECK-LABEL: define double @rsq_f64_dynamic_denormal(
 ; CHECK-SAME: double [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select contract i1 [[TMP4]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP1]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP8]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP10]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -408,8 +604,15 @@ define double @rsq_f64_dynamic_denormal(double %x) #0 {
 define double @rsq_f64_dynamic_denormal_no_pinf(double nofpclass(pinf) %x) #0 {
 ; CHECK-LABEL: define double @rsq_f64_dynamic_denormal_no_pinf(
 ; CHECK-SAME: double nofpclass(pinf) [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -420,8 +623,15 @@ define double @rsq_f64_dynamic_denormal_no_pinf(double nofpclass(pinf) %x) #0 {
 define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(double nofpclass(zero sub) %x) #0 {
 ; CHECK-LABEL: define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(
 ; CHECK-SAME: double nofpclass(zero sub) [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -432,8 +642,15 @@ define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(double nofpclass(zero
 define double @rsq_f64_nnan_sqrt(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan_sqrt(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -444,8 +661,15 @@ define double @rsq_f64_nnan_sqrt(double %x) {
 define double @rsq_f64_nnan_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -456,8 +680,15 @@ define double @rsq_f64_nnan_fdiv(double %x) {
 define double @rsq_f64_ninf_sqrt(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_ninf_sqrt(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = select ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -468,8 +699,13 @@ define double @rsq_f64_ninf_sqrt(double %x) {
 define double @rsq_f64_ninf_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_ninf_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -480,8 +716,13 @@ define double @rsq_f64_ninf_fdiv(double %x) {
 define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_ninf_sqrt_nnan_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -492,8 +733,13 @@ define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
 define double @rsq_f64_nann_sqrt_ninf_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nann_sqrt_ninf_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -506,8 +752,15 @@ define double @rsq_f64_assume_nonzero(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[NONZERO:%.*]] = fcmp one double [[X]], 0.000000e+00
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NONZERO]])
-; CHECK-NEXT:    [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %nonzero = fcmp one double %x, 0.0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index e34fdd9ae6902..519afd8feba28 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -1,9 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,SI-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,SI-GISEL %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG,VI-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL,VI-GISEL %s
+; Test amdgpu-codegenprepare implementation of rsq formation
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-IR,SI-SDAG,SI-SDAG-IR %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-IR,SI-GISEL,SI-GISEL-IR %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-IR,VI-SDAG,VI-SDAG-IR %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-IR,VI-GISEL,VI-GISEL-IR %s
+
+; Test codegen implementation.
+; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-CG,SI-SDAG,SI-SDAG-CG %s
+; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-CG,SI-GISEL,SI-GISEL-CG %s
+; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-CG,VI-SDAG,VI-SDAG-CG %s
+; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-CG,VI-GISEL,VI-GISEL-CG %s
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 declare i32 @llvm.amdgcn.readfirstlane(i32)
@@ -13,20 +20,1548 @@ declare double @llvm.amdgcn.sqrt.f64(double)
 declare double @llvm.fabs.f64(double)
 
 define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64:
+; SI-SDAG-IR-LABEL: s_rsq_f64:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.sqrt.f64(double %x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_rsq_f64_fabs:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[0:1], |s[0:1]|
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT:    s_and_b32 s2, s1, 0x7fffffff
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[0:1], |s[0:1]|
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT:    s_and_b32 s2, s1, 0x7fffffff
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s2
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[0:1], |s[0:1]|
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; VI-SDAG-IR-NEXT:    s_and_b32 s2, s1, 0x7fffffff
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s2
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[0:1], |s[0:1]|
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT:    s_and_b32 s0, s1, 0x7fffffff
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s0
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64_fabs:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
+; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT:    ; return to shader part epilog
+  %fabs.x = call double @llvm.fabs.f64(double %x)
+  %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_neg_rsq_f64:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_neg_rsq_f64:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_neg_rsq_f64:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_neg_rsq_f64:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_neg_rsq_f64:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT:    s_mov_b32 s2, 0xbff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_neg_rsq_f64:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_neg_rsq_f64:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_neg_rsq_f64:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.sqrt.f64(double %x)
+  %result = fdiv contract double -1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[0:1], -s[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT:    s_xor_b32 s2, s1, 0x80000000
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[0:1], -s[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT:    s_xor_b32 s2, s1, 0x80000000
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s2
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[0:1], -s[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; VI-SDAG-IR-NEXT:    s_xor_b32 s2, s1, 0x80000000
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s2
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[0:1], -s[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT:    s_xor_b32 s0, s1, 0x80000000
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s0
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 9
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT:    s_mov_b32 s2, 0xbff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 9
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT:    ; return to shader part epilog
+  %x.neg = fneg double %x
+  %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
+  %result = fdiv contract double -1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64_fabs(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64_fabs:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v5, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, |v[0:1]|, v5
+; SI-SDAG-IR-NEXT:    v_and_b32_e32 v4, 0x7fffffff, v1
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64_fabs:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, |v[0:1]|, v5
+; SI-GISEL-IR-NEXT:    v_and_b32_e32 v4, 0x7fffffff, v1
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64_fabs:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e64 vcc, |v[0:1]|, v4
+; VI-SDAG-IR-NEXT:    v_and_b32_e32 v5, 0x7fffffff, v1
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64_fabs:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[2:3], |v[0:1]|
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e64 vcc, |v[0:1]|, v4
+; VI-GISEL-IR-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_fabs:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_fabs:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_fabs:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_fabs:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.x = call double @llvm.fabs.f64(double %x)
+  %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract0(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -36,36 +1571,37 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
 ; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT:    ; return to shader part epilog
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: s_rsq_f64:
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
@@ -83,12 +1619,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -97,20 +1633,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT:    ; return to shader part epilog
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: s_rsq_f64:
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -121,12 +1654,14 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
 ; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -137,18 +1672,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: s_rsq_f64:
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
 ; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -166,7 +1700,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
 ; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -177,36 +1711,26 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
-  %rsq = call contract double @llvm.sqrt.f64(double %x)
-  %result = fdiv contract double 1.0, %rsq
-  %cast = bitcast double %result to <2 x i32>
-  %cast.0 = extractelement <2 x i32> %cast, i32 0
-  %cast.1 = extractelement <2 x i32> %cast, i32 1
-  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
-  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
-  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
-  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
-  ret <2 x i32> %insert.1
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
 }
 
-define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64_fabs:
+define double @v_rsq_f64_missing_contract1(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -216,36 +1740,37 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
 ; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT:    ; return to shader part epilog
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
@@ -263,12 +1788,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -277,20 +1802,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT:    ; return to shader part epilog
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
-; VI-SDAG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -301,14 +1823,16 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
 ; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
@@ -317,18 +1841,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
 ; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
 ; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -346,7 +1869,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
 ; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -357,3719 +1880,843 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
 ; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
-  %fabs.x = call double @llvm.fabs.f64(double %x)
-  %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
-  %result = fdiv contract double 1.0, %rsq
-  %cast = bitcast double %result to <2 x i32>
-  %cast.0 = extractelement <2 x i32> %cast, i32 0
-  %cast.1 = extractelement <2 x i32> %cast, i32 1
-  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
-  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
-  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
-  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
-  ret <2 x i32> %insert.1
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv double 1.0, %sqrt
+  ret double %rsq
 }
 
-define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_neg_rsq_f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT:    ; return to shader part epilog
+define double @v_neg_rsq_f64(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: s_neg_rsq_f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT:    ; return to shader part epilog
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: s_neg_rsq_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: s_neg_rsq_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
-  %rsq = call contract double @llvm.sqrt.f64(double %x)
-  %result = fdiv contract double -1.0, %rsq
-  %cast = bitcast double %result to <2 x i32>
-  %cast.0 = extractelement <2 x i32> %cast, i32 0
-  %cast.1 = extractelement <2 x i32> %cast, i32 1
-  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
-  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
-  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
-  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
-  ret <2 x i32> %insert.1
-}
-
-define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_neg_rsq_neg_f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 9
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT:    ; return to shader part epilog
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: s_neg_rsq_neg_f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT:    ; return to shader part epilog
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: s_neg_rsq_neg_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 9
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: s_neg_rsq_neg_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
-  %x.neg = fneg double %x
-  %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
-  %result = fdiv contract double -1.0, %rsq
-  %cast = bitcast double %result to <2 x i32>
-  %cast.0 = extractelement <2 x i32> %cast, i32 0
-  %cast.1 = extractelement <2 x i32> %cast, i32 1
-  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
-  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
-  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
-  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
-  ret <2 x i32> %insert.1
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double -1.0, %sqrt
+  ret double %rsq
 }
 
-define double @v_rsq_f64(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64:
+define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_rsq_v2f64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_nop 0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: v_rsq_f64:
+; SI-GISEL-LABEL: v_rsq_v2f64:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-GISEL-NEXT:    v_mov_b32_e32 v18, 0x3ff00000
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[12:13], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v18
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[16:17], v[6:7]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v18
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_rsq_f64:
+; VI-SDAG-LABEL: v_rsq_v2f64:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64_fabs(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_fabs:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_fabs:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_fabs:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_fabs:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call double @llvm.fabs.f64(double %x)
-  %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
-  %rsq = fdiv contract double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64_missing_contract0(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64_missing_contract1(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_neg_rsq_f64(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract double -1.0, %sqrt
-  ret double %rsq
-}
-
-define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_rsq_v2f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_nop 0
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_v2f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-GISEL-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT:    v_mov_b32_e32 v18, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 8, v12
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[12:13], v[10:11]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v18
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
-; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
-; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[16:17], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v18
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_v2f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_v2f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
-  ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_rsq_v2f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-SDAG-NEXT:    s_nop 0
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_v2f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-GISEL-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT:    v_mov_b32_e32 v18, 0xbff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 8, v12
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[12:13], v[10:11]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v18
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
-; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[6:7], -1.0, v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
-; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[16:17], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v18
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_v2f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], -1.0
-; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], -1.0, v[2:3], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_v2f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
-; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
-  ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v12
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[10:11], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT:    v_mov_b32_e32 v13, 0x260
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[10:11]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[14:15], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
-; SI-GISEL-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[12:13], v[16:17], v[10:11]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v17
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v7
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT:    s_nop 0
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
-  ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3ff00000
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s4, v19
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-SDAG-NEXT:    s_nop 0
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v12
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[10:11], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT:    v_mov_b32_e32 v13, 0x260
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[10:11]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[14:15], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
-; SI-GISEL-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
-; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[12:13], v[16:17], v[10:11]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v6
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v7
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT:    s_nop 0
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
-  ret <2 x double> %rsq
-}
-
-define double @v_rsq_f64_fneg_fabs(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 9
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 9
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %fabs = call double @llvm.fabs.f64(double %x)
-  %fneg.fabs = fneg double %fabs
-  %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
-  %rsq = fdiv contract double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn_sqrt(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn_fdiv(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_fdiv:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_fdiv:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_fdiv:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_fdiv:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_neg_rsq_f64__afn(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64__afn:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_f64__afn:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_f64__afn:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_f64__afn:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn double -1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_ninf:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_ninf:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_ninf:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_ninf:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn ninf double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn_nnan(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_nnan:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_nnan:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_nnan:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_nnan:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn nnan double 1.0, %sqrt
-  ret double %rsq
-}
-
-define double @v_rsq_f64__afn_nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_rsq_v2f64:
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
-  ret double %rsq
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
+  ret <2 x double> %rsq
 }
 
-define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT:    s_nop 0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-LABEL: v_neg_rsq_v2f64:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-GISEL-NEXT:    v_mov_b32_e32 v18, 0xbff00000
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[12:13], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v18
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[6:7], -1.0, v[2:3], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[16:17], v[6:7]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v18
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-LABEL: v_neg_rsq_v2f64:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], -1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], -1.0, v[2:3], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_neg_rsq_v2f64:
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
-  ret double %rsq
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
+  ret <2 x double> %rsq
 }
 
-define double @v_rsq_f64__nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
@@ -4081,7 +2728,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -4096,12 +2743,12 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
 ; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
 ; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
@@ -4109,53 +2756,89 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v12, 0xffffff80
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[10:11], 0.5
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[2:3], v[10:11]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v13
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
+; SI-GISEL-NEXT:    v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[10:11]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[12:13], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_mul_f64 v[12:13], v[16:17], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v17
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v7
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT:    s_nop 0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
@@ -4181,124 +2864,174 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
 ; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
 ; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
 ; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT:    v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
-  %rsq = fdiv contract nnan ninf double 1.0, %sqrt
-  ret double %rsq
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
+  ret <2 x double> %rsq
 }
 
-define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
-; SI-SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v12, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[8:9], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[8:9]
-; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[8:9], 0.5
-; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v12
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s4, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT:    s_nop 0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
@@ -4330,36 +3063,53 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v13, 0x260
 ; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
 ; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[4:5]
-; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[10:11]
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-GISEL-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[12:13], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[12:13], v[16:17], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v6
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v7
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT:    s_nop 0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
@@ -4369,57 +3119,66 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[5:6], v[0:1]
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[7:8], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[9:10], -v[0:1], v[5:6], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[11:12], -v[2:3], v[7:8], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[9:10], v[5:6], v[5:6]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[11:12], v[7:8], v[7:8]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT:    v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT:    v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT:    v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT:    v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0
@@ -4427,10 +3186,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
 ; VI-GISEL-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
 ; VI-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-GISEL-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; VI-GISEL-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
@@ -4463,171 +3222,2735 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
 ; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
   ret <2 x double> %rsq
 }
 
-define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64_unsafe:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT:    ; return to shader part epilog
+define double @v_rsq_f64_fneg_fabs(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_brev_b32 s5, 1
+; SI-SDAG-IR-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-IR-NEXT:    v_or_b32_e32 v4, 0x80000000, v1
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; SI-GISEL-IR-NEXT:    v_cmp_eq_f64_e64 vcc, -|v[0:1]|, 0
+; SI-GISEL-IR-NEXT:    v_or_b32_e32 v4, 0x80000000, v1
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_brev_b32 s5, 1
+; VI-SDAG-IR-NEXT:    v_cmp_eq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-IR-NEXT:    v_or_b32_e32 v4, 0x80000000, v1
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; VI-GISEL-IR-NEXT:    v_cmp_eq_f64_e64 vcc, -|v[0:1]|, 0
+; VI-GISEL-IR-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 9
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 9
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %fneg.fabs = fneg double %fabs
+  %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_sqrt(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_fdiv(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_fdiv:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_fdiv:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_fdiv:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_fdiv:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_fdiv:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_fdiv:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_fdiv:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_fdiv:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64__afn:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double -1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x7ff00000
+; SI-SDAG-IR-NEXT:    v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x7ff00000
+; SI-GISEL-IR-NEXT:    v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x7ff00000
+; VI-SDAG-IR-NEXT:    v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x7ff00000
+; VI-GISEL-IR-NEXT:    v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract nnan ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
+; SI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: s_rsq_f64_unsafe:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT:    ; return to shader part epilog
+; SI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v10, 0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v11, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], 0.5
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: s_rsq_f64_unsafe:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
-;
-; VI-GISEL-LABEL: s_rsq_f64_unsafe:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
+; VI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v8, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v9, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[10:11], v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], 0.5
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[10:11], v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v12, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v12, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v15, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[8:9], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[8:9]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[8:9], 0.5
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v12
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v5, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v12, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[10:11], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[10:11]
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v13
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v13, 0x260
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[5:6], v[0:1]
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[7:8], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[9:10], -v[0:1], v[5:6], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[11:12], -v[2:3], v[7:8], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[9:10], v[5:6], v[5:6]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[11:12], v[7:8], v[7:8]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v5, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_rsq_f64_unsafe:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64_unsafe:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64_unsafe:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64_unsafe:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT:    ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT:    ; return to shader part epilog
   %rsq = call contract afn double @llvm.sqrt.f64(double %x)
   %result = fdiv contract afn double 1.0, %rsq
   %cast = bitcast double %result to <2 x i32>
@@ -4641,145 +5964,213 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 }
 
 define double @v_rsq_f64_unsafe(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_unsafe:
-; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-NEXT:    s_brev_b32 s5, 8
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SI-SDAG-IR-LABEL: v_rsq_f64_unsafe:
+; SI-SDAG-IR:       ; %bb.0:
+; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SI-GISEL-LABEL: v_rsq_f64_unsafe:
-; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT:    v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; SI-GISEL-IR-LABEL: v_rsq_f64_unsafe:
+; SI-GISEL-IR:       ; %bb.0:
+; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_rsq_f64_unsafe:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-NEXT:    s_brev_b32 s5, 8
-; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; VI-SDAG-IR-LABEL: v_rsq_f64_unsafe:
+; VI-SDAG-IR:       ; %bb.0:
+; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-GISEL-LABEL: v_rsq_f64_unsafe:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT:    v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; VI-GISEL-IR-LABEL: v_rsq_f64_unsafe:
+; VI-GISEL-IR:       ; %bb.0:
+; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
+; SI-SDAG-CG:       ; %bb.0:
+; SI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
+; SI-GISEL-CG:       ; %bb.0:
+; SI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
+; VI-SDAG-CG:       ; %bb.0:
+; VI-SDAG-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT:    s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
+; VI-GISEL-CG:       ; %bb.0:
+; VI-GISEL-CG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT:    v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT:    v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call afn contract double @llvm.sqrt.f64(double %x)
   %rsq = fdiv afn contract double 1.0, %sqrt
   ret double %rsq
@@ -4828,39 +6219,22 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
   %rsq = fdiv contract double 1.0, %sqrt
   ret double %rsq
@@ -4909,39 +6283,22 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
   %rsq = fdiv contract double -1.0, %sqrt
   ret double %rsq
@@ -4992,41 +6349,23 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-GISEL-NEXT:    ; return to shader part epilog
 ;
-; VI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT:    ; return to shader part epilog
-;
-; VI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT:    ; return to shader part epilog
+; VI-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-NEXT:    ; return to shader part epilog
   %rsq = call contract double @llvm.amdgcn.sqrt.f64(double %x)
   %result = fdiv contract double 1.0, %rsq
   %cast = bitcast double %result to <2 x i32>
@@ -5718,6 +7057,8 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
 }
 
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
-; GISEL: {{.*}}
-; SDAG: {{.*}}
+; SI: {{.*}}
+; SI-CG: {{.*}}
+; SI-IR: {{.*}}
+; VI-CG: {{.*}}
+; VI-IR: {{.*}}

>From 25709efd067b0e070be2c6074907616890da645b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 14 Dec 2025 17:37:47 +0100
Subject: [PATCH 02/10] Interested mask bug

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e45d0652a65ef..144ce1a62300f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -641,7 +641,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
   if (MaybeZero)
     Interested = fcZero;
   if (MaybePosInf)
-    Interested = fcPosInf;
+    Interested |= fcPosInf;
 
   if (Interested != fcNone) {
     KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI);

>From 1e8b31b7bcf5b2b8730440dad791471e31c44a40 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 14 Dec 2025 17:37:59 +0100
Subject: [PATCH 03/10] nnan not useful

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 144ce1a62300f..714a1a47dec2b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -630,11 +630,9 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
   //
   // Fast math flags imply:
   //   sqrt ninf => !isinf(x)
-  //   sqrt nnan => not helpful
   //   fdiv ninf => x != 0, !isinf(x)
-  //   fdiv nnan => x != 0
   bool MaybePosInf = !SqrtFMF.noInfs() && !DivFMF.noInfs();
-  bool MaybeZero = !DivFMF.noInfs() && !DivFMF.noNaNs();
+  bool MaybeZero = !DivFMF.noInfs();
 
   DenormalMode DenormMode;
   FPClassTest Interested = fcNone;

>From 067861952a15e4689f940f62cfbc2f7fa4ad87b2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 14 Dec 2025 17:38:18 +0100
Subject: [PATCH 04/10] nnan not useful

---
 .../AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll  | 10 ++++++----
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 20 ++++++++-----------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index 764b10a7d1987..acdbb4f0d3254 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -44,7 +44,7 @@ define double @rsq_f64_nnan(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan(
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
@@ -63,7 +63,7 @@ define double @neg_rsq_f64_nnan(double %x) {
 ; CHECK-LABEL: define double @neg_rsq_f64_nnan(
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
@@ -662,7 +662,7 @@ define double @rsq_f64_nnan_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_nnan_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
@@ -717,8 +717,10 @@ define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
 ; CHECK-LABEL: define double @rsq_f64_ninf_sqrt_nnan_fdiv(
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT:    [[TMP8:%.*]] = select nnan ninf contract i1 [[TMP7]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 519afd8feba28..19658a94d48ac 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -4575,9 +4575,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
-; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x7ff00000
-; SI-SDAG-IR-NEXT:    v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -4593,9 +4592,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
-; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x7ff00000
-; SI-GISEL-IR-NEXT:    v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -4611,9 +4609,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
-; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x7ff00000
-; VI-SDAG-IR-NEXT:    v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -4629,9 +4626,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
-; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x7ff00000
-; VI-GISEL-IR-NEXT:    v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v4
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc

>From 8c190a11410165fc529eae74cab0e4b9fb699c0e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 14 Dec 2025 17:53:31 +0100
Subject: [PATCH 05/10] swap

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 714a1a47dec2b..6976869a611aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -636,10 +636,10 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
 
   DenormalMode DenormMode;
   FPClassTest Interested = fcNone;
-  if (MaybeZero)
-    Interested = fcZero;
   if (MaybePosInf)
-    Interested |= fcPosInf;
+    Interested = fcPosInf;
+  if (MaybeZero)
+    Interested |= fcZero;
 
   if (Interested != fcNone) {
     KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI);

>From 228fe49b35192c048a68634b1f3b11199afa5a1f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 16 Dec 2025 15:48:34 +0100
Subject: [PATCH 06/10] X not Y0

---
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |   2 +-
 .../AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll  |  18 +-
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 288 +++++++++---------
 3 files changed, 154 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6976869a611aa..a489f904e0530 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -651,7 +651,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
       MaybeZero = false;
   }
 
-  Value *SpecialOrRsq = Y0;
+  Value *SpecialOrRsq = X;
   if (MaybeZero || MaybePosInf) {
     Value *Cond;
     if (MaybePosInf && MaybeZero) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index acdbb4f0d3254..e68f449cec980 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -84,7 +84,7 @@ define double @rsq_f64_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -101,7 +101,7 @@ define double @neg_rsq_f64_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -119,7 +119,7 @@ define double @rsq_f64_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -136,7 +136,7 @@ define double @neg_rsq_f64_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -173,7 +173,7 @@ define double @rsq_f64_fdiv_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -513,7 +513,7 @@ define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x)
 ; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -568,7 +568,7 @@ define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pin
 ; CHECK-SAME: double nofpclass(pinf zero sub) [[X:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -701,7 +701,7 @@ define double @rsq_f64_ninf_fdiv(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
@@ -737,7 +737,7 @@ define double @rsq_f64_nann_sqrt_ninf_fdiv(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 19658a94d48ac..50655338eaf2e 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -4377,53 +4377,53 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
@@ -4787,53 +4787,53 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
@@ -4984,53 +4984,53 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -5183,53 +5183,53 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_rsq_f64__nnan_ninf:
@@ -5404,77 +5404,77 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; SI-SDAG-IR:       ; %bb.0:
 ; SI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[6:7]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; SI-GISEL-IR:       ; %bb.0:
 ; SI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v10, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v11, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], 0.5
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[10:11], 0.5
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], 0.5
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[2:3], v[6:7]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; VI-SDAG-IR:       ; %bb.0:
 ; VI-SDAG-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[6:7]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
 ; VI-GISEL-IR:       ; %bb.0:
 ; VI-GISEL-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v8, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v9, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[4:5], -v[0:1], v[0:1]
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], -v[2:3], v[2:3]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[10:11], v[0:1], v[4:5]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], 0.5
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[10:11], v[4:5], v[0:1]
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[10:11], v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], 0.5
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], 0.5
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[10:11], v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[2:3], v[6:7]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:

>From 411f210b1b1821dbc0962b4e6cbedf0da35a7d48 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 22 Dec 2025 13:27:31 +0100
Subject: [PATCH 07/10] Fixup negative handling

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index a489f904e0530..0196021486164 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -684,14 +684,13 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
 
   // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64.
   Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0));
-  Value *Y0E = Builder.CreateFMul(Y0, E);
+
+  Value *Y0E = Builder.CreateFMul(IsNegative ? NegY0 : Y0, E);
 
   Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375),
                                   ConstantFP::get(X->getType(), 0.5));
-  if (IsNegative)
-    EFMA = Builder.CreateFNeg(EFMA);
 
-  return Builder.CreateFMA(Y0E, EFMA, Y0);
+  return Builder.CreateFMA(Y0E, EFMA, IsNegative ? NegY0 : Y0);
 }
 
 bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,

>From f7ce96a7b2698c18f8eb4e185c66e91a7b0f3e00 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 22 Dec 2025 13:35:41 +0100
Subject: [PATCH 08/10] Commute to canonical

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 0196021486164..9513ed946f8e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -680,12 +680,12 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
   }
 
   Value *NegY0 = Builder.CreateFNeg(Y0);
-  Value *NegXY0 = Builder.CreateFMul(NegY0, SpecialOrRsq);
+  Value *NegXY0 = Builder.CreateFMul(SpecialOrRsq, NegY0);
 
   // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64.
   Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0));
 
-  Value *Y0E = Builder.CreateFMul(IsNegative ? NegY0 : Y0, E);
+  Value *Y0E = Builder.CreateFMul(E, IsNegative ? NegY0 : Y0);
 
   Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375),
                                   ConstantFP::get(X->getType(), 0.5));

>From 59bb6803b4de08495b5455f75721cfbd7fa35e57 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 22 Dec 2025 13:38:37 +0100
Subject: [PATCH 09/10] neg rsq comment

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 9513ed946f8e4..0a262b41ab330 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -620,6 +620,11 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
   //   double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0);
   //   return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0);
   //
+  // -rsq(x):
+  //   double y0 = BUILTIN_AMDGPU_RSQRT_F64(x);
+  //   double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0);
+  //   return MATH_MAD(-y0*e, MATH_MAD(e, 0.375, 0.5), -y0);
+  //
   // The rsq instruction handles the special cases correctly. We need to check
   // for the edge case conditions to ensure the special case propagates through
   // the later instructions.

>From 2227f62dd21a43841d0564007a530505f8792c59 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 22 Dec 2025 13:40:09 +0100
Subject: [PATCH 10/10] Regenerate checks

---
 .../AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll  | 147 ++++---
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 376 +++++++++---------
 2 files changed, 259 insertions(+), 264 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index e68f449cec980..1fecc2b613c4c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -8,9 +8,9 @@ define double @rsq_f64(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -27,12 +27,11 @@ define double @neg_rsq_f64(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
-; CHECK-NEXT:    [[TMP9:%.*]] = fneg contract double [[TMP8]]
-; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP4]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -47,9 +46,9 @@ define double @rsq_f64_nnan(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -66,12 +65,11 @@ define double @neg_rsq_f64_nnan(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP6]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
-; CHECK-NEXT:    [[TMP9:%.*]] = fneg nnan contract double [[TMP8]]
-; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP4]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -84,9 +82,9 @@ define double @rsq_f64_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -101,12 +99,11 @@ define double @neg_rsq_f64_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
-; CHECK-NEXT:    [[TMP7:%.*]] = fneg ninf contract double [[TMP6]]
-; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP2]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -119,9 +116,9 @@ define double @rsq_f64_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -136,12 +133,11 @@ define double @neg_rsq_f64_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
-; CHECK-NEXT:    [[TMP7:%.*]] = fneg nnan ninf contract double [[TMP6]]
-; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP2]])
 ; CHECK-NEXT:    ret double [[FDIV]]
 ;
   %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -156,9 +152,9 @@ define double @rsq_f64_sqrt_nnan_ninf(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -173,9 +169,9 @@ define double @rsq_f64_fdiv_nnan_ninf(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -197,18 +193,18 @@ define <2 x double> @rsq_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
 ; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP10]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
 ; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP16]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP19]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
@@ -232,18 +228,18 @@ define <2 x double> @neg_rsq_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
 ; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP10]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
 ; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP16]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP19]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
@@ -267,21 +263,20 @@ define <2 x double> @mixed_sign_rsq_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
 ; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP10]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
 ; CHECK-NEXT:    [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fneg contract double [[TMP14]]
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul contract double [[TMP16]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul contract double [[TMP19]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
-; CHECK-NEXT:    [[TMP22:%.*]] = fneg contract double [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP22]], double [[TMP14]])
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP17]])
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
 ; CHECK-NEXT:    [[FDIV:%.*]] = insertelement <2 x double> [[TMP24]], double [[TMP23]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[FDIV]]
@@ -303,9 +298,9 @@ define <2 x double> @rsq_some_elements_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
 ; CHECK-NEXT:    [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg contract double [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul contract double [[TMP10]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = fdiv contract double 2.000000e+00, [[TMP2]]
@@ -477,9 +472,9 @@ define double @rsq_f64_input_known_not_zero(double nofpclass(zero) %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -496,9 +491,9 @@ define double @rsq_f64_input_known_not_pinf(double nofpclass(pinf) %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -513,9 +508,9 @@ define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x)
 ; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -532,9 +527,9 @@ define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(double nofpclass(pin
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -551,9 +546,9 @@ define double @rsq_f64_input_known_not_pinf_zero_daz(double nofpclass(pinf zero)
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -568,9 +563,9 @@ define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pin
 ; CHECK-SAME: double nofpclass(pinf zero sub) [[X:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -589,9 +584,9 @@ define double @rsq_f64_dynamic_denormal(double %x) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select contract i1 [[TMP4]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP1]], [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP8]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP10]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -608,9 +603,9 @@ define double @rsq_f64_dynamic_denormal_no_pinf(double nofpclass(pinf) %x) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -627,9 +622,9 @@ define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(double nofpclass(zero
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -646,9 +641,9 @@ define double @rsq_f64_nnan_sqrt(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -665,9 +660,9 @@ define double @rsq_f64_nnan_fdiv(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul nnan contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -684,9 +679,9 @@ define double @rsq_f64_ninf_sqrt(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ninf contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP3:%.*]] = select ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul ninf contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -701,9 +696,9 @@ define double @rsq_f64_ninf_fdiv(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -720,9 +715,9 @@ define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP8:%.*]] = select nnan ninf contract i1 [[TMP7]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP8]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP8]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -737,9 +732,9 @@ define double @rsq_f64_nann_sqrt_ninf_fdiv(double %x) {
 ; CHECK-SAME: double [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul nnan ninf contract double [[X]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
@@ -758,9 +753,9 @@ define double @rsq_f64_assume_nonzero(double %x) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
 ; CHECK-NEXT:    [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg contract double [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul contract double [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
 ; CHECK-NEXT:    [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[FDIV]]
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 50655338eaf2e..43bfe73515adb 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -29,11 +29,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -49,11 +49,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -71,9 +71,9 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -89,11 +89,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -290,11 +290,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -311,11 +311,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s2
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -334,9 +334,9 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -353,11 +353,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -554,13 +554,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-SDAG-IR-NEXT:    ; return to shader part epilog
@@ -574,13 +574,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-GISEL-IR-NEXT:    ; return to shader part epilog
@@ -596,11 +596,11 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; VI-SDAG-IR-NEXT:    ; return to shader part epilog
@@ -614,13 +614,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; VI-GISEL-IR-NEXT:    ; return to shader part epilog
@@ -815,13 +815,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-SDAG-IR-NEXT:    ; return to shader part epilog
@@ -836,13 +836,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s2
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-GISEL-IR-NEXT:    ; return to shader part epilog
@@ -859,11 +859,11 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; VI-SDAG-IR-NEXT:    ; return to shader part epilog
@@ -878,13 +878,13 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s1, v1
 ; VI-GISEL-IR-NEXT:    ; return to shader part epilog
@@ -1079,10 +1079,10 @@ define double @v_rsq_f64(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1096,10 +1096,10 @@ define double @v_rsq_f64(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1114,9 +1114,9 @@ define double @v_rsq_f64(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1131,9 +1131,9 @@ define double @v_rsq_f64(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1316,11 +1316,11 @@ define double @v_rsq_f64_fabs(double %x) {
 ; SI-SDAG-IR-NEXT:    v_and_b32_e32 v4, 0x7fffffff, v1
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1334,11 +1334,11 @@ define double @v_rsq_f64_fabs(double %x) {
 ; SI-GISEL-IR-NEXT:    v_and_b32_e32 v4, 0x7fffffff, v1
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1354,9 +1354,9 @@ define double @v_rsq_f64_fabs(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1372,9 +1372,9 @@ define double @v_rsq_f64_fabs(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -1896,12 +1896,12 @@ define double @v_neg_rsq_f64(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_neg_rsq_f64:
@@ -1913,12 +1913,12 @@ define double @v_neg_rsq_f64(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_neg_rsq_f64:
@@ -1931,11 +1931,11 @@ define double @v_neg_rsq_f64(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_neg_rsq_f64:
@@ -1948,11 +1948,11 @@ define double @v_neg_rsq_f64(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64:
@@ -3263,11 +3263,11 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; SI-SDAG-IR-NEXT:    v_or_b32_e32 v4, 0x80000000, v1
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3280,11 +3280,11 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; SI-GISEL-IR-NEXT:    v_or_b32_e32 v4, 0x80000000, v1
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3301,9 +3301,9 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3318,9 +3318,9 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3505,10 +3505,10 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3522,10 +3522,10 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3540,9 +3540,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3557,9 +3557,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3742,10 +3742,10 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3759,10 +3759,10 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3777,9 +3777,9 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3794,9 +3794,9 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3955,10 +3955,10 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3972,10 +3972,10 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -3990,9 +3990,9 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4007,9 +4007,9 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4168,12 +4168,12 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
@@ -4185,12 +4185,12 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn:
@@ -4203,11 +4203,11 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
@@ -4220,11 +4220,11 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
@@ -4380,9 +4380,9 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4393,9 +4393,9 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4406,9 +4406,9 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4419,9 +4419,9 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4580,10 +4580,10 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4597,10 +4597,10 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4615,9 +4615,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4632,9 +4632,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4790,9 +4790,9 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4803,9 +4803,9 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4816,9 +4816,9 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4829,9 +4829,9 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -4987,11 +4987,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -5000,11 +5000,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -5013,11 +5013,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -5026,11 +5026,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
-; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -5186,9 +5186,9 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -5199,9 +5199,9 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -5212,9 +5212,9 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -5225,9 +5225,9 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -5408,13 +5408,13 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[4:5]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[6:7]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[6:7]
@@ -5427,13 +5427,13 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v10, 0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v11, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[4:5]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[6:7]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[10:11], 0.5
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[2:3], v[6:7]
@@ -5446,13 +5446,13 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[4:5]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[6:7]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[8:9], v[0:1], v[4:5]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[10:11], v[2:3], v[6:7]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[4:5]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[6:7]
@@ -5465,13 +5465,13 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-IR-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v8, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v9, 0x3fd80000
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[4:5], v[0:1]
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[4:5]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[6:7]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 1.0
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[10:11], v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[10:11], v[0:1], v[4:5]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], 0.5
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[12:13], v[2:3], v[6:7]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[10:11], v[0:1], v[4:5]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[12:13], v[2:3], v[6:7]
@@ -5733,11 +5733,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ; SI-SDAG-IR-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s0, 0
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; SI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -5753,11 +5753,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; SI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -5775,9 +5775,9 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s1, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
 ; VI-SDAG-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -5793,11 +5793,11 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, s1
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[2:3], v[2:3], -v[0:1]
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
 ; VI-GISEL-IR-NEXT:    v_readfirstlane_b32 s0, v0
@@ -5969,10 +5969,10 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; SI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -5986,10 +5986,10 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; SI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; SI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; SI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -6004,9 +6004,9 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; VI-SDAG-IR-NEXT:    s_mov_b32 s5, 0x3fd80000
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-SDAG-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
 ; VI-SDAG-IR-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-SDAG-IR-NEXT:    s_setpc_b64 s[30:31]
@@ -6021,9 +6021,9 @@ define double @v_rsq_f64_unsafe(double %x) {
 ; VI-GISEL-IR-NEXT:    v_mov_b32_e32 v5, 0x3fd80000
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-IR-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[0:1], v[0:1], -v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
-; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT:    v_mul_f64 v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
 ; VI-GISEL-IR-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
 ; VI-GISEL-IR-NEXT:    s_setpc_b64 s[30:31]



More information about the llvm-commits mailing list