[llvm-branch-commits] [llvm] AMDGPU: Skip last corrections and scaling for afn llvm.sqrt.f64 (PR #183697)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 27 00:05:08 PST 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/183697
>From db56f215d788194471288263bf5d72012cbfdd31 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 27 Feb 2026 08:06:08 +0100
Subject: [PATCH] AMDGPU: Skip last corrections and scaling for afn
llvm.sqrt.f64
Device libs has a fast sqrt macro implemented this way.
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 39 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 40 +-
llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll | 892 +++++-------------
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 742 +++------------
4 files changed, 425 insertions(+), 1288 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c117c6bf4ddd9..1b9028eb41487 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5856,18 +5856,21 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
Register X = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
- auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767);
+ Register SqrtX = X;
+ Register Scaling, ZeroInt;
+ if (!MI.getFlag(MachineInstr::FmAfn)) {
+ auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767);
- auto ZeroInt = B.buildConstant(S32, 0);
- auto Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant);
+ ZeroInt = B.buildConstant(S32, 0).getReg(0);
+ Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant).getReg(0);
- // Scale up input if it is too small.
- auto ScaleUpFactor = B.buildConstant(S32, 256);
- auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
- auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
+ // Scale up input if it is too small.
+ auto ScaleUpFactor = B.buildConstant(S32, 256);
+ auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
+ SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags).getReg(0);
+ }
- auto SqrtY =
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
+ auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX);
auto Half = B.buildFConstant(F64, 0.5);
auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
@@ -5884,15 +5887,17 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
auto SqrtS2 = B.buildFMA(F64, SqrtD0, SqrtH1, SqrtS1);
- auto NegSqrtS2 = B.buildFNeg(F64, SqrtS2);
- auto SqrtD1 = B.buildFMA(F64, NegSqrtS2, SqrtS2, SqrtX);
-
- auto SqrtRet = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2);
+ Register SqrtRet = SqrtS2.getReg(0);
+ if (!MI.getFlag(MachineInstr::FmAfn)) {
+ auto NegSqrtS2 = B.buildFNeg(F64, SqrtS2);
+ auto SqrtD1 = B.buildFMA(F64, NegSqrtS2, SqrtS2, SqrtX);
+ auto SqrtD2 = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2);
- // Scale down the result.
- auto ScaleDownFactor = B.buildConstant(S32, -128);
- auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
- SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags);
+ // Scale down the result.
+ auto ScaleDownFactor = B.buildConstant(S32, -128);
+ auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
+ SqrtRet = B.buildFLdexp(F64, SqrtD2, ScaleDown, Flags).getReg(0);
+ }
Register IsZeroOrInf;
if (MI.getFlag(MachineInstr::FmNoInfs)) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1e0ba25158ff4..cd8575751220b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13182,17 +13182,20 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue X = Op.getOperand(0);
- SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
-
- SDValue Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
-
SDValue ZeroInt = DAG.getConstant(0, DL, MVT::i32);
- // Scale up input if it is too small.
- SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
- SDValue ScaleUp =
- DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleUpFactor, ZeroInt);
- SDValue SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
+ SDValue SqrtX = X;
+ SDValue Scaling;
+ if (!Flags.hasApproximateFuncs()) {
+ SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
+ Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
+
+ // Scale up input if it is too small.
+ SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
+ SDValue ScaleUp =
+ DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleUpFactor, ZeroInt);
+ SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
+ }
SDValue SqrtY = DAG.getNode(AMDGPUISD::RSQ, DL, MVT::f64, SqrtX);
@@ -13214,16 +13217,19 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
SDValue SqrtS2 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD0, SqrtH1, SqrtS1);
- SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
- SDValue SqrtD1 =
- DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS2, SqrtS2, SqrtX);
+ SDValue SqrtRet = SqrtS2;
+ if (!Flags.hasApproximateFuncs()) {
+ SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
+ SDValue SqrtD1 =
+ DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS2, SqrtS2, SqrtX);
- SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
+ SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
- SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32);
- SDValue ScaleDown =
- DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt);
- SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
+ SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32);
+ SDValue ScaleDown = DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling,
+ ScaleDownFactor, ZeroInt);
+ SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
+ }
// TODO: Check for DAZ and expand to subnormals
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 7cdf08800cb25..cc120f8d595c4 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1066,116 +1066,80 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
; GFX6-SDAG-LABEL: s_sqrt_f64_afn:
; GFX6-SDAG: ; %bb.0:
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GFX6-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX6-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX6-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v6, 0x260
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v7, s1
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX6-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-SDAG-NEXT: v_readfirstlane_b32 s1, v1
; GFX6-SDAG-NEXT: ; return to shader part epilog
;
; GFX8-SDAG-LABEL: s_sqrt_f64_afn:
; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GFX8-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX8-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX8-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 0x260
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v7, s1
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-SDAG-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-SDAG-NEXT: ; return to shader part epilog
;
; GFX6-GISEL-LABEL: s_sqrt_f64_afn:
; GFX6-GISEL: ; %bb.0:
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v6, 0x260
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v7, s0
+; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX6-GISEL-NEXT: ; return to shader part epilog
;
; GFX8-GISEL-LABEL: s_sqrt_f64_afn:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x260
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, s0
+; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-GISEL-NEXT: ; return to shader part epilog
@@ -1193,112 +1157,76 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX6-SDAG-LABEL: s_sqrt_f64_afn_nnan_ninf:
; GFX6-SDAG: ; %bb.0:
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GFX6-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX6-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX6-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v6, s1
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX6-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-SDAG-NEXT: v_readfirstlane_b32 s1, v1
; GFX6-SDAG-NEXT: ; return to shader part epilog
;
; GFX8-SDAG-LABEL: s_sqrt_f64_afn_nnan_ninf:
; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GFX8-SDAG-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX8-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX8-SDAG-NEXT: s_cselect_b32 s2, 0x100, 0
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, s1
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-SDAG-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-SDAG-NEXT: ; return to shader part epilog
;
; GFX6-GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf:
; GFX6-GISEL: ; %bb.0:
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v6, s0
+; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX6-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX6-GISEL-NEXT: ; return to shader part epilog
;
; GFX8-GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 vcc, s[0:1], 0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, s0
+; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; GFX8-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-GISEL-NEXT: ; return to shader part epilog
@@ -1657,27 +1585,16 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX6-SDAG-LABEL: v_sqrt_f64_afn:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -1685,27 +1602,16 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX8-SDAG-LABEL: v_sqrt_f64_afn:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -1713,13 +1619,9 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX6-GISEL-LABEL: v_sqrt_f64_afn:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1727,13 +1629,6 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1741,13 +1636,9 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX8-GISEL-LABEL: v_sqrt_f64_afn:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1755,13 +1646,6 @@ define double @v_sqrt_f64_afn(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1773,27 +1657,16 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nsz:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -1801,27 +1674,16 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nsz:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -1829,13 +1691,9 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nsz:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1843,13 +1701,6 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1857,13 +1708,9 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nsz:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -1871,13 +1718,6 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1889,17 +1729,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX6-SDAG-LABEL: v_sqrt_v2f64_afn:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v16, 0x260
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v16
; GFX6-SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -1912,20 +1746,8 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GFX6-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX6-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -1935,17 +1757,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX8-SDAG-LABEL: v_sqrt_v2f64_afn:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v16, 0x260
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v16
; GFX8-SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -1958,20 +1774,8 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GFX8-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX8-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -1981,18 +1785,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v16, 0x260
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v16
; GFX6-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GFX6-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2007,18 +1804,6 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2028,18 +1813,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v16, 0x260
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v16
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v16
; GFX8-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GFX8-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2054,18 +1832,6 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2079,27 +1845,16 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2107,27 +1862,16 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2135,13 +1879,9 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2149,13 +1889,6 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2163,13 +1896,9 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0x260
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2177,13 +1906,6 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2191,71 +1913,44 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
ret double %result
}
-define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
-; GFX6-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
+define double @v_sqrt_f64_afn_ninf(double %x) {
+; GFX6-SDAG-LABEL: v_sqrt_f64_afn_ninf:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX8-SDAG-LABEL: v_sqrt_f64_afn_ninf:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX6-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX6-GISEL-LABEL: v_sqrt_f64_afn_ninf:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2263,26 +1958,15 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX8-GISEL-LABEL: v_sqrt_f64_afn_ninf:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2290,12 +1974,78 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn ninf double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
+; GFX6-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[2:3]
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], |v[0:1]|
+; GFX6-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[2:3]
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], |v[0:1]|
+; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], |v[0:1]|
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_rsq_f64_e64 v[2:3], |v[0:1]|
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, 0
+; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], |v[0:1]|
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2308,26 +2058,15 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2335,26 +2074,15 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2362,13 +2090,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2376,12 +2099,6 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2389,13 +2106,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2403,12 +2115,6 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2420,17 +2126,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-SDAG-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -2443,19 +2142,8 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GFX6-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX6-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2465,17 +2153,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-SDAG-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
@@ -2488,19 +2169,8 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GFX8-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
-; GFX8-SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2510,18 +2180,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX6-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GFX6-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2536,17 +2198,6 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2556,18 +2207,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX8-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GFX8-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
@@ -2582,17 +2225,6 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -2606,26 +2238,15 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX6-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2633,26 +2254,15 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX8-SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
-; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
-; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2660,13 +2270,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX6-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2674,12 +2279,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2687,13 +2286,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX8-GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -2701,12 +2295,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 750ab22aa9a55..238cff87f2d5f 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3567,15 +3567,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
@@ -3583,12 +3577,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3610,15 +3599,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -3627,11 +3610,6 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3653,27 +3631,16 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64__afn_sqrt:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -3692,13 +3659,9 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64__afn_sqrt:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3706,13 +3669,6 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@@ -4017,27 +3973,16 @@ define double @v_rsq_f64__afn(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64__afn:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4052,15 +3997,9 @@ define double @v_rsq_f64__afn(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64__afn:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4068,11 +4007,6 @@ define double @v_rsq_f64__afn(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4087,27 +4021,16 @@ define double @v_rsq_f64__afn(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64__afn:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4122,13 +4045,9 @@ define double @v_rsq_f64__afn(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64__afn:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4136,13 +4055,6 @@ define double @v_rsq_f64__afn(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4230,27 +4142,16 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4266,15 +4167,9 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4282,11 +4177,6 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4301,27 +4191,16 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4337,13 +4216,9 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4351,13 +4226,6 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4429,26 +4297,15 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4463,14 +4320,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4478,11 +4329,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4497,26 +4343,15 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64__afn_ninf:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4531,13 +4366,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64__afn_ninf:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4545,12 +4375,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4638,27 +4462,16 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4673,15 +4486,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4689,11 +4496,6 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4708,27 +4510,16 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4743,13 +4534,9 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4757,13 +4544,6 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4835,26 +4615,15 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4869,14 +4638,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4884,11 +4647,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4903,26 +4661,15 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -4937,13 +4684,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64__afn_nnan_ninf:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4951,12 +4693,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5028,26 +4764,15 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5063,14 +4788,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5078,11 +4797,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5097,26 +4811,15 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-SDAG-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5132,13 +4835,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5146,12 +4844,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -5464,44 +5156,26 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v12
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
+; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v15, 0, v14, vcc
-; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
-; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 0.5
; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], 0.5
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v15
-; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[12:13], v[6:7], v[12:13]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[8:9], v[6:7], v[8:9]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[4:5]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[6:7]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[12:13], v[10:11], v[12:13]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[12:13], v[6:7], v[10:11]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[6:7]
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
@@ -5523,45 +5197,26 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v5, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[4:5]
-; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
-; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
+; SI-GISEL-CG-NEXT: v_mul_f64 v[12:13], v[10:11], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[10:11], v[2:3], v[10:11]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[12:13], v[10:11], 0.5
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[10:11]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[12:13], v[6:7], v[12:13]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
@@ -5583,17 +5238,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-SDAG-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[2:3], v[4:5]
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_mul_f64 v[10:11], v[0:1], v[6:7]
@@ -5606,19 +5254,8 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; VI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
@@ -5642,18 +5279,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-CG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v5, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
+; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; VI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; VI-GISEL-CG-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
@@ -5668,17 +5297,6 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
-; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
@@ -5786,29 +5404,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
;
; SI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
; SI-SDAG-CG: ; %bb.0:
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
-; SI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
-; SI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
-; SI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
-; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v6, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v7, s1
+; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; SI-SDAG-CG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, s0
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; SI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5822,29 +5431,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
;
; SI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
; SI-GISEL-CG: ; %bb.0:
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
-; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v6, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v7, s0
+; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; SI-GISEL-CG-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, s1
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; SI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5858,29 +5458,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
;
; VI-SDAG-CG-LABEL: s_rsq_f64_unsafe:
; VI-SDAG-CG: ; %bb.0:
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, 0
-; VI-SDAG-CG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-SDAG-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-CG-NEXT: s_and_b64 s[2:3], vcc, exec
-; VI-SDAG-CG-NEXT: s_cselect_b32 s2, 0x100, 0
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v0, s2
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-CG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
-; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
-; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0x260
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v6, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v7, s1
+; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], s[0:1], v[0:1]
+; VI-SDAG-CG-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 0.5
+; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[2:3], s[0:1]
+; VI-SDAG-CG-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -5894,29 +5485,20 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) {
;
; VI-GISEL-CG-LABEL: s_rsq_f64_unsafe:
; VI-GISEL-CG: ; %bb.0:
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v0, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v1, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v6, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, s[0:1], v6
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v7, s0
+; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], 0.5
+; VI-GISEL-CG-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 0.5
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[0:1], s[0:1]
+; VI-GISEL-CG-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, s1
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
@@ -6011,27 +5593,16 @@ define double @v_rsq_f64_unsafe(double %x) {
; SI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
; SI-SDAG-CG: ; %bb.0:
; SI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; SI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6046,15 +5617,9 @@ define double @v_rsq_f64_unsafe(double %x) {
; SI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
; SI-GISEL-CG: ; %bb.0:
; SI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; SI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; SI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -6062,11 +5627,6 @@ define double @v_rsq_f64_unsafe(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
-; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6081,27 +5641,16 @@ define double @v_rsq_f64_unsafe(double %x) {
; VI-SDAG-CG-LABEL: v_rsq_f64_unsafe:
; VI-SDAG-CG: ; %bb.0:
; VI-SDAG-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-CG-NEXT: s_mov_b32 s4, 0
-; VI-SDAG-CG-NEXT: s_brev_b32 s5, 8
-; VI-SDAG-CG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; VI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@@ -6116,13 +5665,9 @@ define double @v_rsq_f64_unsafe(double %x) {
; VI-GISEL-CG-LABEL: v_rsq_f64_unsafe:
; VI-GISEL-CG: ; %bb.0:
; VI-GISEL-CG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v2, 0
-; VI-GISEL-CG-NEXT: v_bfrev_b32_e32 v3, 8
-; VI-GISEL-CG-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0x260
+; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; VI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; VI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -6130,13 +5675,6 @@ define double @v_rsq_f64_unsafe(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
-; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
More information about the llvm-branch-commits
mailing list