[llvm-branch-commits] [llvm] AMDGPU: Introduce f64 rsq pattern in AMDGPUCodeGenPrepare (PR #172053)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 12 09:40:41 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/172053
Handle this here instead of DAGCombine, mostly because the f32
case is handled here due to the dependency on !fpmath. Also we can
take advantage of computeKnownFPClass.
>From 43df284f914ff150be2edf9cbdeda45a29f2d32d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 12 Dec 2025 15:44:58 +0100
Subject: [PATCH] AMDGPU: Introduce f64 rsq pattern in AMDGPUCodeGenPrepare
Handle this here instead of DAGCombine, mostly because the f32
case is handled here due to the dependency on !fpmath. Also we can
take advantage of computeKnownFPClass.
---
.../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 141 +-
.../AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll | 365 +-
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 10091 +++++++++-------
3 files changed, 6154 insertions(+), 4443 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 71ea9ef6fc050..e45d0652a65ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -236,6 +236,9 @@ class AMDGPUCodeGenPrepareImpl
FastMathFlags FMF) const;
Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
FastMathFlags FMF) const;
+ Value *emitRsqF64(IRBuilder<> &Builder, Value *X, FastMathFlags SqrtFMF,
+ FastMathFlags DivFMF, const Instruction *CtxI,
+ bool IsNegative) const;
bool tryNarrowMathIfNoOverflow(Instruction *I);
@@ -605,6 +608,94 @@ static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
return Builder.CreateFMul(Rsq, OutputScaleFactor);
}
+/// Emit inverse sqrt expansion for f64 with a correction sequence on top of
+/// v_rsq_f64. This should give a 1ulp result.
+Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
+ FastMathFlags SqrtFMF,
+ FastMathFlags DivFMF,
+ const Instruction *CtxI,
+ bool IsNegative) const {
+ // rsq(x):
+ // double y0 = BUILTIN_AMDGPU_RSQRT_F64(x);
+ // double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0);
+ // return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0);
+ //
+ // The rsq instruction handles the special cases correctly. We need to check
+ // for the edge case conditions to ensure the special case propagates through
+ // the later instructions.
+
+ Value *Y0 = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, X);
+
+ // Try to elide the edge case check.
+ //
+ // Fast math flags imply:
+ // sqrt ninf => !isinf(x)
+ // sqrt nnan => not helpful
+ // fdiv ninf => x != 0, !isinf(x)
+ // fdiv nnan => x != 0
+ bool MaybePosInf = !SqrtFMF.noInfs() && !DivFMF.noInfs();
+ bool MaybeZero = !DivFMF.noInfs() && !DivFMF.noNaNs();
+
+ DenormalMode DenormMode;
+ FPClassTest Interested = fcNone;
+ if (MaybeZero)
+ Interested = fcZero;
+ if (MaybePosInf)
+ Interested = fcPosInf;
+
+ if (Interested != fcNone) {
+ KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI);
+ if (KnownSrc.isKnownNeverPosInfinity())
+ MaybePosInf = false;
+
+ DenormMode = F.getDenormalMode(X->getType()->getFltSemantics());
+ if (KnownSrc.isKnownNeverLogicalZero(DenormMode))
+ MaybeZero = false;
+ }
+
+ Value *SpecialOrRsq = Y0;
+ if (MaybeZero || MaybePosInf) {
+ Value *Cond;
+ if (MaybePosInf && MaybeZero) {
+ if (DenormMode.Input != DenormalMode::DenormalModeKind::Dynamic) {
+ FPClassTest TestMask = fcPosInf | fcZero;
+ if (DenormMode.inputsAreZero())
+ TestMask |= fcSubnormal;
+
+ Cond = Builder.createIsFPClass(X, TestMask);
+ } else {
+ // Avoid using llvm.is.fpclass for dynamic denormal mode, since it
+ // doesn't respect the floating-point environment.
+ Value *IsZero =
+ Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+ Value *IsInf =
+ Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+ Cond = Builder.CreateOr(IsZero, IsInf);
+ }
+ } else if (MaybeZero) {
+ Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+ } else {
+ Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+ }
+
+ SpecialOrRsq = Builder.CreateSelect(Cond, Y0, X);
+ }
+
+ Value *NegY0 = Builder.CreateFNeg(Y0);
+ Value *NegXY0 = Builder.CreateFMul(NegY0, SpecialOrRsq);
+
+ // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64.
+ Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0));
+ Value *Y0E = Builder.CreateFMul(Y0, E);
+
+ Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375),
+ ConstantFP::get(X->getType(), 0.5));
+ if (IsNegative)
+ EFMA = Builder.CreateFNeg(EFMA);
+
+ return Builder.CreateFMA(Y0E, EFMA, Y0);
+}
+
bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
FastMathFlags DivFMF,
FastMathFlags SqrtFMF) const {
@@ -612,8 +703,22 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
if (!DivFMF.allowContract() || !SqrtFMF.allowContract())
return false;
- // v_rsq_f32 gives 1ulp
- return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+ Type *EltTy = SqrtOp->getType()->getScalarType();
+ switch (EltTy->getTypeID()) {
+ case Type::FloatTyID:
+ // v_rsq_f32 gives 1ulp
+ // Separate correctly rounded fdiv + sqrt give ~1.81 ulp.
+
+ // FIXME: rsq formation should not depend on approx func or the fpmath
+ // accuracy. This strictly improves precision.
+ return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+ case Type::DoubleTyID:
+ return true;
+ default:
+ return false;
+ }
+
+ llvm_unreachable("covered switch");
}
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
@@ -629,8 +734,6 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
if (!CLHS)
return nullptr;
- assert(Den->getType()->isFloatTy());
-
bool IsNegative = false;
// TODO: Handle other numerator values with arcp.
@@ -639,14 +742,20 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<>::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(DivFMF | SqrtFMF);
- if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
- canIgnoreDenormalInput(Den, CtxI)) {
- Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
- // -1.0 / sqrt(x) -> fneg(rsq(x))
- return IsNegative ? Builder.CreateFNeg(Result) : Result;
+ if (Den->getType()->isFloatTy()) {
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
+ canIgnoreDenormalInput(Den, CtxI)) {
+ Value *Result =
+ Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
+ // -1.0 / sqrt(x) -> fneg(rsq(x))
+ return IsNegative ? Builder.CreateFNeg(Result) : Result;
+ }
+
+ return emitRsqIEEE1ULP(Builder, Den, IsNegative);
}
- return emitRsqIEEE1ULP(Builder, Den, IsNegative);
+ if (Den->getType()->isDoubleTy())
+ return emitRsqF64(Builder, Den, SqrtFMF, DivFMF, CtxI, IsNegative);
}
return nullptr;
@@ -758,6 +867,9 @@ Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
return Rsq;
}
+ if (!Num->getType()->isFloatTy())
+ return nullptr;
+
Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
if (Rcp)
return Rcp;
@@ -793,7 +905,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
return false;
Type *Ty = FDiv.getType()->getScalarType();
- if (!Ty->isFloatTy())
+ const bool IsFloat = Ty->isFloatTy();
+ if (!IsFloat && !Ty->isDoubleTy())
return false;
// The f64 rcp/rsq approximations are pretty inaccurate. We can do an
@@ -818,6 +931,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
RsqOp = SqrtOp->getOperand(0);
}
+ // rcp path not yet implemented for f64.
+ if (!IsFloat && !RsqOp)
+ return false;
+
// Inaccurate rcp is allowed with afn.
//
// Defer to codegen to handle this.
@@ -832,7 +949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
return false;
// Defer the correct implementations to codegen.
- if (ReqdAccuracy < 1.0f)
+ if (IsFloat && ReqdAccuracy < 1.0f)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index b97cd91f2ab32..764b10a7d1987 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -4,8 +4,15 @@
define double @rsq_f64(double %x) {
; CHECK-LABEL: define double @rsq_f64(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -16,8 +23,16 @@ define double @rsq_f64(double %x) {
define double @neg_rsq_f64(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP9:%.*]] = fneg contract double [[TMP8]]
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -28,8 +43,15 @@ define double @neg_rsq_f64(double %x) {
define double @rsq_f64_nnan(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -40,8 +62,16 @@ define double @rsq_f64_nnan(double %x) {
define double @neg_rsq_f64_nnan(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_nnan(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP9:%.*]] = fneg nnan contract double [[TMP8]]
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -52,8 +82,13 @@ define double @neg_rsq_f64_nnan(double %x) {
define double @rsq_f64_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -64,8 +99,14 @@ define double @rsq_f64_ninf(double %x) {
define double @neg_rsq_f64_ninf(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP7:%.*]] = fneg ninf contract double [[TMP6]]
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -76,8 +117,13 @@ define double @neg_rsq_f64_ninf(double %x) {
define double @rsq_f64_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -88,8 +134,14 @@ define double @rsq_f64_nnan_ninf(double %x) {
define double @neg_rsq_f64_nnan_ninf(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP7:%.*]] = fneg nnan ninf contract double [[TMP6]]
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -100,8 +152,15 @@ define double @neg_rsq_f64_nnan_ninf(double %x) {
define double @rsq_f64_sqrt_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_sqrt_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -112,8 +171,13 @@ define double @rsq_f64_sqrt_nnan_ninf(double %x) {
define double @rsq_f64_fdiv_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_fdiv_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -125,7 +189,30 @@ define <2 x double> @rsq_v2f64(<2 x double> %x) {
; CHECK-LABEL: define <2 x double> @rsq_v2f64(
; CHECK-SAME: <2 x double> [[X:%.*]]) {
; CHECK-NEXT: [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract <2 x double> splat (double 1.000000e+00), [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT: [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT: [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT: [[FDIV:%.*]] = insertelement <2 x double> [[TMP23]], double [[TMP22]], i64 1
; CHECK-NEXT: ret <2 x double> [[FDIV]]
;
%sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -137,7 +224,30 @@ define <2 x double> @neg_rsq_v2f64(<2 x double> %x) {
; CHECK-LABEL: define <2 x double> @neg_rsq_v2f64(
; CHECK-SAME: <2 x double> [[X:%.*]]) {
; CHECK-NEXT: [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract <2 x double> splat (double 1.000000e+00), [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT: [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT: [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP22:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP21]], double [[TMP14]])
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT: [[FDIV:%.*]] = insertelement <2 x double> [[TMP23]], double [[TMP22]], i64 1
; CHECK-NEXT: ret <2 x double> [[FDIV]]
;
%sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -149,7 +259,31 @@ define <2 x double> @mixed_sign_rsq_v2f64(<2 x double> %x) {
; CHECK-LABEL: define <2 x double> @mixed_sign_rsq_v2f64(
; CHECK-SAME: <2 x double> [[X:%.*]]) {
; CHECK-NEXT: [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract <2 x double> <double 1.000000e+00, double -1.000000e+00>, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT: [[TMP14:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP4]])
+; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP4]], i32 608)
+; CHECK-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], double [[TMP14]], double [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = fneg contract double [[TMP14]]
+; CHECK-NEXT: [[TMP18:%.*]] = fmul contract double [[TMP17]], [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = call contract double @llvm.fma.f64(double [[TMP18]], double [[TMP14]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP20:%.*]] = fmul contract double [[TMP14]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = call contract double @llvm.fma.f64(double [[TMP19]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP22:%.*]] = fneg contract double [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = call contract double @llvm.fma.f64(double [[TMP20]], double [[TMP22]], double [[TMP14]])
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT: [[FDIV:%.*]] = insertelement <2 x double> [[TMP24]], double [[TMP23]], i64 1
; CHECK-NEXT: ret <2 x double> [[FDIV]]
;
%sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -161,7 +295,22 @@ define <2 x double> @rsq_some_elements_v2f64(<2 x double> %x) {
; CHECK-LABEL: define <2 x double> @rsq_some_elements_v2f64(
; CHECK-SAME: <2 x double> [[X:%.*]]) {
; CHECK-NEXT: [[SQRT_X:%.*]] = call contract <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract <2 x double> <double 1.000000e+00, double 2.000000e+00>, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SQRT_X]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[TMP3]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP3]], i32 608)
+; CHECK-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], double [[TMP5]], double [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = fneg contract double [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul contract double [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP5]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP11:%.*]] = fmul contract double [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = call contract double @llvm.fma.f64(double [[TMP10]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP13:%.*]] = call contract double @llvm.fma.f64(double [[TMP11]], double [[TMP12]], double [[TMP5]])
+; CHECK-NEXT: [[TMP14:%.*]] = fdiv contract double 2.000000e+00, [[TMP2]]
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
+; CHECK-NEXT: [[FDIV:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i64 1
; CHECK-NEXT: ret <2 x double> [[FDIV]]
;
%sqrt.x = call contract <2 x double> @llvm.sqrt.f64(<2 x double> %x)
@@ -324,8 +473,15 @@ define double @rsq_amdgcn_f64_nnan_ninf(double %x) {
define double @rsq_f64_input_known_not_zero(double nofpclass(zero) %x) {
; CHECK-LABEL: define double @rsq_f64_input_known_not_zero(
; CHECK-SAME: double nofpclass(zero) [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -336,8 +492,15 @@ define double @rsq_f64_input_known_not_zero(double nofpclass(zero) %x) {
define double @rsq_f64_input_known_not_pinf(double nofpclass(pinf) %x) {
; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf(
; CHECK-SAME: double nofpclass(pinf) [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -348,8 +511,13 @@ define double @rsq_f64_input_known_not_pinf(double nofpclass(pinf) %x) {
define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x) {
; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero(
; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -360,8 +528,15 @@ define double @rsq_f64_input_known_not_pinf_zero(double nofpclass(pinf zero) %x)
define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(double nofpclass(pinf zero) %x) #0 {
; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(
; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -372,8 +547,15 @@ define double @rsq_f64_input_known_not_pinf_zero_dynamic_fp(double nofpclass(pin
define double @rsq_f64_input_known_not_pinf_zero_daz(double nofpclass(pinf zero) %x) #1 {
; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_daz(
; CHECK-SAME: double nofpclass(pinf zero) [[X:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -384,8 +566,13 @@ define double @rsq_f64_input_known_not_pinf_zero_daz(double nofpclass(pinf zero)
define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pinf zero sub) %x) #1 {
; CHECK-LABEL: define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(
; CHECK-SAME: double nofpclass(pinf zero sub) [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -396,8 +583,17 @@ define double @rsq_f64_input_known_not_pinf_zero_denorm_daz(double nofpclass(pin
define double @rsq_f64_dynamic_denormal(double %x) #0 {
; CHECK-LABEL: define double @rsq_f64_dynamic_denormal(
; CHECK-SAME: double [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP4]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP6:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP9:%.*]] = fmul contract double [[TMP1]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = call contract double @llvm.fma.f64(double [[TMP8]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP9]], double [[TMP10]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -408,8 +604,15 @@ define double @rsq_f64_dynamic_denormal(double %x) #0 {
define double @rsq_f64_dynamic_denormal_no_pinf(double nofpclass(pinf) %x) #0 {
; CHECK-LABEL: define double @rsq_f64_dynamic_denormal_no_pinf(
; CHECK-SAME: double nofpclass(pinf) [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -420,8 +623,15 @@ define double @rsq_f64_dynamic_denormal_no_pinf(double nofpclass(pinf) %x) #0 {
define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(double nofpclass(zero sub) %x) #0 {
; CHECK-LABEL: define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(
; CHECK-SAME: double nofpclass(zero sub) [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -432,8 +642,15 @@ define double @rsq_f64_dynamic_denormal_no_zero_no_denorm(double nofpclass(zero
define double @rsq_f64_nnan_sqrt(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan_sqrt(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -444,8 +661,15 @@ define double @rsq_f64_nnan_sqrt(double %x) {
define double @rsq_f64_nnan_fdiv(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan_fdiv(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -456,8 +680,15 @@ define double @rsq_f64_nnan_fdiv(double %x) {
define double @rsq_f64_ninf_sqrt(double %x) {
; CHECK-LABEL: define double @rsq_f64_ninf_sqrt(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -468,8 +699,13 @@ define double @rsq_f64_ninf_sqrt(double %x) {
define double @rsq_f64_ninf_fdiv(double %x) {
; CHECK-LABEL: define double @rsq_f64_ninf_fdiv(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -480,8 +716,13 @@ define double @rsq_f64_ninf_fdiv(double %x) {
define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
; CHECK-LABEL: define double @rsq_f64_ninf_sqrt_nnan_fdiv(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -492,8 +733,13 @@ define double @rsq_f64_ninf_sqrt_nnan_fdiv(double %x) {
define double @rsq_f64_nann_sqrt_ninf_fdiv(double %x) {
; CHECK-LABEL: define double @rsq_f64_nann_sqrt_ninf_fdiv(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -506,8 +752,15 @@ define double @rsq_f64_assume_nonzero(double %x) {
; CHECK-SAME: double [[X:%.*]]) {
; CHECK-NEXT: [[NONZERO:%.*]] = fcmp one double [[X]], 0.000000e+00
; CHECK-NEXT: call void @llvm.assume(i1 [[NONZERO]])
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%nonzero = fcmp one double %x, 0.0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index e34fdd9ae6902..519afd8feba28 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -1,9 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,SI-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,SI-GISEL %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG,VI-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL,VI-GISEL %s
+; Test amdgpu-codegenprepare implementation of rsq formation
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-IR,SI-SDAG,SI-SDAG-IR %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-IR,SI-GISEL,SI-GISEL-IR %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-IR,VI-SDAG,VI-SDAG-IR %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-IR,VI-GISEL,VI-GISEL-IR %s
+
+; Test codegen implementation.
+; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-CG,SI-SDAG,SI-SDAG-CG %s
+; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-CG,SI-GISEL,SI-GISEL-CG %s
+; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-CG,VI-SDAG,VI-SDAG-CG %s
+; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-CG,VI-GISEL,VI-GISEL-CG %s
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.readfirstlane(i32)
@@ -13,20 +20,1548 @@ declare double @llvm.amdgcn.sqrt.f64(double)
declare double @llvm.fabs.f64(double)
define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64:
+; SI-SDAG-IR-LABEL: s_rsq_f64:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.sqrt.f64(double %x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_rsq_f64_fabs:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: v_rsq_f64_e64 v[0:1], |s[0:1]|
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT: s_and_b32 s2, s1, 0x7fffffff
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: v_rsq_f64_e64 v[0:1], |s[0:1]|
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT: s_and_b32 s2, s1, 0x7fffffff
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s2
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: v_rsq_f64_e64 v[0:1], |s[0:1]|
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; VI-SDAG-IR-NEXT: s_and_b32 s2, s1, 0x7fffffff
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s2
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: v_rsq_f64_e64 v[0:1], |s[0:1]|
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, |s[0:1]|, v2
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT: s_and_b32 s0, s1, 0x7fffffff
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64_fabs:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
+; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT: ; return to shader part epilog
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_neg_rsq_f64:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_neg_rsq_f64:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_neg_rsq_f64:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_neg_rsq_f64:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_neg_rsq_f64:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT: s_mov_b32 s2, 0xbff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_neg_rsq_f64:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_neg_rsq_f64:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_neg_rsq_f64:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.sqrt.f64(double %x)
+ %result = fdiv contract double -1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: v_rsq_f64_e64 v[0:1], -s[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT: s_xor_b32 s2, s1, 0x80000000
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: v_rsq_f64_e64 v[0:1], -s[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT: s_xor_b32 s2, s1, 0x80000000
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s2
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: v_rsq_f64_e64 v[0:1], -s[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; VI-SDAG-IR-NEXT: s_xor_b32 s2, s1, 0x80000000
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s2
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: v_rsq_f64_e64 v[0:1], -s[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, -s[0:1], v2
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT: s_xor_b32 s0, s1, 0x80000000
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 9
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
+; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT: s_mov_b32 s2, 0xbff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 9
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
+; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT: ; return to shader part epilog
+ %x.neg = fneg double %x
+ %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
+ %result = fdiv contract double -1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64_fabs(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64_fabs:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v5, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, |v[0:1]|, v5
+; SI-SDAG-IR-NEXT: v_and_b32_e32 v4, 0x7fffffff, v1
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64_fabs:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, |v[0:1]|, v5
+; SI-GISEL-IR-NEXT: v_and_b32_e32 v4, 0x7fffffff, v1
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64_fabs:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e64 vcc, |v[0:1]|, v4
+; VI-SDAG-IR-NEXT: v_and_b32_e32 v5, 0x7fffffff, v1
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64_fabs:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e64 vcc, |v[0:1]|, v4
+; VI-GISEL-IR-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_fabs:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_fabs:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_fabs:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_fabs:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract0(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -36,36 +1571,37 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT: ; return to shader part epilog
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: s_rsq_f64:
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
@@ -83,12 +1619,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -97,20 +1633,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT: ; return to shader part epilog
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: s_rsq_f64:
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
@@ -121,12 +1654,14 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -137,18 +1672,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: s_rsq_f64:
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -166,7 +1700,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -177,36 +1711,26 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
- %rsq = call contract double @llvm.sqrt.f64(double %x)
- %result = fdiv contract double 1.0, %rsq
- %cast = bitcast double %result to <2 x i32>
- %cast.0 = extractelement <2 x i32> %cast, i32 0
- %cast.1 = extractelement <2 x i32> %cast, i32 1
- %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
- %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
- %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
- %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
- ret <2 x i32> %insert.1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
}
-define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64_fabs:
+define double @v_rsq_f64_missing_contract1(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT: v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; SI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -216,36 +1740,37 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT: ; return to shader part epilog
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: s_rsq_f64_fabs:
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
@@ -263,12 +1788,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -277,20 +1802,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT: ; return to shader part epilog
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT: v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
-; VI-SDAG-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; VI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
@@ -301,14 +1823,16 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
@@ -317,18 +1841,17 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: s_rsq_f64_fabs:
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -346,7 +1869,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -357,3719 +1880,843 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
- %fabs.x = call double @llvm.fabs.f64(double %x)
- %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
- %result = fdiv contract double 1.0, %rsq
- %cast = bitcast double %result to <2 x i32>
- %cast.0 = extractelement <2 x i32> %cast, i32 0
- %cast.1 = extractelement <2 x i32> %cast, i32 1
- %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
- %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
- %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
- %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
- ret <2 x i32> %insert.1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv double 1.0, %sqrt
+ ret double %rsq
}
-define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_neg_rsq_f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT: s_mov_b32 s2, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT: ; return to shader part epilog
+define double @v_neg_rsq_f64(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: s_neg_rsq_f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT: ; return to shader part epilog
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: s_neg_rsq_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: s_neg_rsq_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
- %rsq = call contract double @llvm.sqrt.f64(double %x)
- %result = fdiv contract double -1.0, %rsq
- %cast = bitcast double %result to <2 x i32>
- %cast.0 = extractelement <2 x i32> %cast, i32 0
- %cast.1 = extractelement <2 x i32> %cast, i32 1
- %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
- %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
- %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
- %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
- ret <2 x i32> %insert.1
-}
-
-define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
-; SI-SDAG-LABEL: s_neg_rsq_neg_f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT: v_bfrev_b32_e32 v1, 9
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
-; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT: s_mov_b32 s2, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT: ; return to shader part epilog
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: s_neg_rsq_neg_f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT: ; return to shader part epilog
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: s_neg_rsq_neg_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT: v_bfrev_b32_e32 v1, 9
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
-; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: s_neg_rsq_neg_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
- %x.neg = fneg double %x
- %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
- %result = fdiv contract double -1.0, %rsq
- %cast = bitcast double %result to <2 x i32>
- %cast.0 = extractelement <2 x i32> %cast, i32 0
- %cast.1 = extractelement <2 x i32> %cast, i32 1
- %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
- %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
- %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
- %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
- ret <2 x i32> %insert.1
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double -1.0, %sqrt
+ ret double %rsq
}
-define double @v_rsq_f64(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64:
+define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_rsq_v2f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0
; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-SDAG-NEXT: s_nop 0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_rsq_f64:
+; SI-GISEL-LABEL: v_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
+; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0x3ff00000
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[12:13], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v18
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[16:17], v[6:7]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_rsq_f64:
+; VI-SDAG-LABEL: v_rsq_v2f64:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0
; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64_fabs(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_fabs:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_fabs:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_fabs:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_fabs:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %fabs.x = call double @llvm.fabs.f64(double %x)
- %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
- %rsq = fdiv contract double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64_missing_contract0(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64_missing_contract1(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract double @llvm.sqrt.f64(double %x)
- %rsq = fdiv double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_neg_rsq_f64(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract double -1.0, %sqrt
- ret double %rsq
-}
-
-define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_rsq_v2f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; SI-SDAG-NEXT: s_nop 0
-; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_v2f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[12:13], v[10:11]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v18
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
-; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
-; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
-; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[16:17], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_v2f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
-; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_v2f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
-; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
- %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
- ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_rsq_v2f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-SDAG-NEXT: s_nop 0
-; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_v2f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0xbff00000
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[12:13], v[10:11]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v18
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
-; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
-; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[6:7], -1.0, v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
-; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[16:17], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_v2f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], -1.0
-; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], -1.0, v[2:3], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_v2f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
-; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
- %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
- ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[10:11]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[14:15], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
-; SI-GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[12:13], v[16:17], v[10:11]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v17
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT: s_nop 0
-; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
- %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
- ret <2 x double> %rsq
-}
-
-define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
-; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
-; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
-; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
-; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
-; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
-; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
-; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
-; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
-; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
-; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3ff00000
-; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v19
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-SDAG-NEXT: s_nop 0
-; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[10:11]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
-; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[14:15], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
-; SI-GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
-; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[12:13], v[16:17], v[10:11]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v6
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; SI-GISEL-NEXT: s_nop 0
-; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
-; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
-; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
-; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
-; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
-; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
-; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
-; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
-; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
-; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
-; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
-; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
- %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
- ret <2 x double> %rsq
-}
-
-define double @v_rsq_f64_fneg_fabs(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 9
-; SI-SDAG-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 9
-; VI-SDAG-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %fabs = call double @llvm.fabs.f64(double %x)
- %fneg.fabs = fneg double %fabs
- %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
- %rsq = fdiv contract double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn_sqrt(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
-; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn_fdiv(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_fdiv:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_fdiv:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_fdiv:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_fdiv:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_neg_rsq_f64__afn(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64__afn:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_neg_rsq_f64__afn:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_neg_rsq_f64__afn:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_f64__afn:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn double -1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_ninf:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_ninf:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_ninf:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_ninf:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn ninf double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn_nnan(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_nnan:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_nnan:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_nnan:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_f64__afn_nnan:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn nnan double 1.0, %sqrt
- ret double %rsq
-}
-
-define double @v_rsq_f64__afn_nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
- ret double %rsq
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
+ ret <2 x double> %rsq
}
-define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0
; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT: s_nop 0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-LABEL: v_neg_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
+; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0xbff00000
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[12:13], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[10:11], v[12:13], 1.0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v18
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[10:11], v[14:15], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[16:17], v[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[6:7], -1.0, v[2:3], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 1.0
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[14:15]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[16:17], v[6:7]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v18
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[16:17]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-LABEL: v_neg_rsq_v2f64:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0
; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], -1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], -1.0, v[2:3], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_neg_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
- ret double %rsq
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
+ ret <2 x double> %rsq
}
-define double @v_rsq_f64__nnan_ninf(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0
@@ -4081,7 +2728,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -4096,12 +2743,12 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
@@ -4109,53 +2756,89 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
+; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[10:11]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[12:13], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[12:13], v[16:17], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v17
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT: s_nop 0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0
@@ -4181,124 +2864,174 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
- %rsq = fdiv contract nnan ninf double 1.0, %sqrt
- ret double %rsq
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
+ ret <2 x double> %rsq
}
-define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
-; SI-SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0
; SI-SDAG-NEXT: s_brev_b32 s5, 8
; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; SI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
; SI-SDAG-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[10:11]
; SI-SDAG-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-NEXT: v_mul_f64 v[6:7], v[0:1], v[8:9]
-; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5
-; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v12
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
+; SI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v10
+; SI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], v15
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[16:17], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], 1.0
; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
+; SI-SDAG-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[6:7], v[8:9], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[10:11]
+; SI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7
+; SI-SDAG-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[8:9], v[18:19], v[8:9]
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[18:19], v[6:7]
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3ff00000
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
+; SI-SDAG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v11
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT: s_nop 0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
@@ -4330,36 +3063,53 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[6:7], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[10:11]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[6:7], 1.0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[10:11], v[4:5], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[12:13], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[10:11], v[8:9], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[6:7], v[14:15], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v11
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[14:15], v[12:13], v[14:15]
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[12:13], v[16:17], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[18:19], v[4:5], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[12:13], v[16:17]
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v17, v6
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-GISEL-NEXT: s_nop 0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[10:11], v[12:13]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0
@@ -4369,57 +3119,66 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
; VI-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
; VI-SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; VI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; VI-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; VI-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[5:6], v[0:1]
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[7:8], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[9:10], -v[0:1], v[5:6], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[11:12], -v[2:3], v[7:8], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[9:10], v[5:6], v[5:6]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[11:12], v[7:8], v[7:8]
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[9:10], v[5:6]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[11:12], v[7:8]
+; VI-SDAG-NEXT: v_fma_f64 v[13:14], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_div_scale_f64 v[13:14], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[15:16], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[15:16], -v[5:6], v[9:10], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[19:20], -v[7:8], v[11:12], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[9:10], v[9:10], v[15:16], v[9:10]
+; VI-SDAG-NEXT: v_fma_f64 v[11:12], v[11:12], v[19:20], v[11:12]
+; VI-SDAG-NEXT: v_mul_f64 v[15:16], v[13:14], v[9:10]
+; VI-SDAG-NEXT: v_mul_f64 v[19:20], v[17:18], v[11:12]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[5:6], v[15:16], v[13:14]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[7:8], v[19:20], v[17:18]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[9:10], v[15:16]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[11:12], v[19:20]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
@@ -4427,10 +3186,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -4463,171 +3222,2735 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
- %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
ret <2 x double> %rsq
}
-define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
-; SI-SDAG-LABEL: s_rsq_f64_unsafe:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; SI-SDAG-NEXT: ; return to shader part epilog
+define double @v_rsq_f64_fneg_fabs(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_brev_b32 s5, 1
+; SI-SDAG-IR-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-IR-NEXT: v_or_b32_e32 v4, 0x80000000, v1
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; SI-GISEL-IR-NEXT: v_cmp_eq_f64_e64 vcc, -|v[0:1]|, 0
+; SI-GISEL-IR-NEXT: v_or_b32_e32 v4, 0x80000000, v1
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_brev_b32 s5, 1
+; VI-SDAG-IR-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-IR-NEXT: v_or_b32_e32 v4, 0x80000000, v1
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e64 v[2:3], -|v[0:1]|
+; VI-GISEL-IR-NEXT: v_cmp_eq_f64_e64 vcc, -|v[0:1]|, 0
+; VI-GISEL-IR-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 9
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 9
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %fneg.fabs = fneg double %fabs
+ %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_sqrt(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_fdiv(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_fdiv:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_fdiv:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_fdiv:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_fdiv:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_fdiv:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_fdiv:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_fdiv:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_fdiv:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64__afn:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double -1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_ninf:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_ninf:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x7ff00000
+; SI-SDAG-IR-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x7ff00000
+; SI-GISEL-IR-NEXT: v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x7ff00000
+; VI-SDAG-IR-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x7ff00000
+; VI-GISEL-IR-NEXT: v_cmp_eq_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__nnan_ninf(double %x) {
+; SI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-IR-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-CG-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-CG-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-CG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-CG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract nnan ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
+; SI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; SI-SDAG-IR-NEXT: v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: s_rsq_f64_unsafe:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; SI-GISEL-NEXT: ; return to shader part epilog
+; SI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v10, 0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v11, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], 0.5
+; SI-GISEL-IR-NEXT: v_mul_f64 v[12:13], v[2:3], v[6:7]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: s_rsq_f64_unsafe:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
-;
-; VI-GISEL-LABEL: s_rsq_f64_unsafe:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
+; VI-SDAG-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; VI-SDAG-IR-NEXT: v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[4:5], v[4:5], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[6:7], v[6:7], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[8:9], v[4:5], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[10:11], v[6:7], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-IR-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v8, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v9, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_mul_f64 v[4:5], -v[0:1], v[0:1]
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], -v[2:3], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[10:11], v[0:1], v[4:5]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], 0.5
+; VI-GISEL-IR-NEXT: v_mul_f64 v[12:13], v[2:3], v[6:7]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[10:11], v[4:5], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[12:13], v[6:7], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v12, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v15, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[8:9]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v12
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[5:6], v[0:1]
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[7:8], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[9:10], -v[0:1], v[5:6], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[11:12], -v[2:3], v[7:8], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[9:10], v[5:6], v[5:6]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[11:12], v[7:8], v[7:8]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
+; SI-SDAG-IR-LABEL: s_rsq_f64_unsafe:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-IR-LABEL: s_rsq_f64_unsafe:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-IR-LABEL: s_rsq_f64_unsafe:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v3, s1
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-IR-NEXT: s_mov_b32 s0, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s1, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-IR-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-IR-LABEL: s_rsq_f64_unsafe:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v2, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v2
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, s1
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[0:1], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-IR-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-IR-NEXT: ; return to shader part epilog
+;
+; SI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-CG-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-CG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-CG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-CG-NEXT: ; return to shader part epilog
%rsq = call contract afn double @llvm.sqrt.f64(double %x)
%result = fdiv contract afn double 1.0, %rsq
%cast = bitcast double %result to <2 x i32>
@@ -4641,145 +5964,213 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
}
define double @v_rsq_f64_unsafe(double %x) {
-; SI-SDAG-LABEL: v_rsq_f64_unsafe:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-IR-LABEL: v_rsq_f64_unsafe:
+; SI-SDAG-IR: ; %bb.0:
+; SI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; SI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_rsq_f64_unsafe:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-IR-LABEL: v_rsq_f64_unsafe:
+; SI-GISEL-IR: ; %bb.0:
+; SI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; SI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; SI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; SI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; SI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; SI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_rsq_f64_unsafe:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-IR-LABEL: v_rsq_f64_unsafe:
+; VI-SDAG-IR: ; %bb.0:
+; VI-SDAG-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-SDAG-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-SDAG-IR-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-IR-NEXT: s_mov_b32 s5, 0x3fd80000
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-SDAG-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-SDAG-IR-NEXT: v_mul_f64 v[4:5], v[2:3], v[0:1]
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], 0.5
+; VI-SDAG-IR-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-IR-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_rsq_f64_unsafe:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-IR-LABEL: v_rsq_f64_unsafe:
+; VI-GISEL-IR: ; %bb.0:
+; VI-GISEL-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-IR-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0x260
+; VI-GISEL-IR-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-IR-NEXT: v_mov_b32_e32 v5, 0x3fd80000
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-IR-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; VI-GISEL-IR-NEXT: v_mul_f64 v[0:1], -v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], 1.0
+; VI-GISEL-IR-NEXT: v_mul_f64 v[6:7], v[2:3], v[0:1]
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], 0.5
+; VI-GISEL-IR-NEXT: v_fma_f64 v[0:1], v[6:7], v[0:1], v[2:3]
+; VI-GISEL-IR-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
+; SI-SDAG-CG: ; %bb.0:
+; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
+; SI-GISEL-CG: ; %bb.0:
+; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
+; VI-SDAG-CG: ; %bb.0:
+; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
+; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-SDAG-CG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
+; VI-GISEL-CG: ; %bb.0:
+; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
+; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
+; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; VI-GISEL-CG-NEXT: s_setpc_b64 s[30:31]
%sqrt = call afn contract double @llvm.sqrt.f64(double %x)
%rsq = fdiv afn contract double 1.0, %sqrt
ret double %rsq
@@ -4828,39 +6219,22 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
%rsq = fdiv contract double 1.0, %sqrt
ret double %rsq
@@ -4909,39 +6283,22 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
%rsq = fdiv contract double -1.0, %sqrt
ret double %rsq
@@ -4992,41 +6349,23 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; SI-GISEL-NEXT: ; return to shader part epilog
;
-; VI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
-; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
-; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
-; VI-SDAG-NEXT: ; return to shader part epilog
-;
-; VI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
-; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; VI-GISEL-NEXT: ; return to shader part epilog
+; VI-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI: ; %bb.0:
+; VI-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-NEXT: v_readfirstlane_b32 s0, v0
+; VI-NEXT: v_readfirstlane_b32 s1, v1
+; VI-NEXT: ; return to shader part epilog
%rsq = call contract double @llvm.amdgcn.sqrt.f64(double %x)
%result = fdiv contract double 1.0, %rsq
%cast = bitcast double %result to <2 x i32>
@@ -5718,6 +7057,8 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
-; GISEL: {{.*}}
-; SDAG: {{.*}}
+; SI: {{.*}}
+; SI-CG: {{.*}}
+; SI-IR: {{.*}}
+; VI-CG: {{.*}}
+; VI-IR: {{.*}}
More information about the llvm-branch-commits
mailing list