[llvm] 47b3ada - AMDGPU: Add more sqrt f64 lowering tests

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 25 04:54:18 PDT 2023


Author: Matt Arsenault
Date: 2023-07-25T07:54:11-04:00
New Revision: 47b3ada432f8afee9723a4b3d27b3efbef34dedf

URL: https://github.com/llvm/llvm-project/commit/47b3ada432f8afee9723a4b3d27b3efbef34dedf
DIFF: https://github.com/llvm/llvm-project/commit/47b3ada432f8afee9723a4b3d27b3efbef34dedf.diff

LOG: AMDGPU: Add more sqrt f64 lowering tests

Almost all permutations of the flags are potentially relevant.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
    llvm/test/CodeGen/AMDGPU/rsq.f64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index dbf38717c5c123..62393930d92e2b 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
 
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
 
 define double @v_sqrt_f64(double %x) {
 ; GCN-LABEL: v_sqrt_f64:
@@ -115,9 +115,219 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
   ret <2 x i32> %insert.1
 }
 
+define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64_afn:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_readfirstlane_b32 s0, v0
+; GCN-NEXT:    v_readfirstlane_b32 s1, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %result = call afn double @llvm.sqrt.f64(double %x)
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64_afn_nnan_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_readfirstlane_b32 s0, v0
+; GCN-NEXT:    v_readfirstlane_b32 s1, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define double @v_sqrt_f64_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_nsz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_nnan_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_nnan_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nnan ninf double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_nnan_ninf_nsz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nnan ninf nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_afn(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_afn_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nsz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64_afn:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  ret <2 x double> %result
+}
+
+define double @v_sqrt_f64_afn_nnan(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn nnan double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %result = call afn ninf double @llvm.sqrt.f64(double %fabs)
+  ret double %result
+}
+
+define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  ret <2 x double> %result
+}
+
+define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call afn nnan ninf nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
+; GCN-LABEL: v_sqrt_f64__approx_func_fp_math:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
+; GCN-LABEL: v_sqrt_f64__enough_unsafe_attrs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64__unsafe_attr(double %x) #4 {
+; GCN-LABEL: v_sqrt_f64__unsafe_attr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nsz double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  ret <2 x double> %result
+}
+
+define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
+; GCN-LABEL: v_sqrt_v3f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT:    v_sqrt_f64_e32 v[4:5], v[4:5]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %x)
+  ret <3 x double> %result
+}
+
 declare double @llvm.fabs.f64(double) #0
 declare double @llvm.sqrt.f64(double) #0
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
+declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0
 declare i32 @llvm.amdgcn.readfirstlane(i32) #1
 
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #1 = { convergent nounwind willreturn memory(none) }
+attributes #2 = { "approx-func-fp-math"="true" }
+attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
+attributes #4 = { "unsafe-fp-math"="true" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 93a88692826768..a20aaac1598c32 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -1,302 +1,2516 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN,GCN-UNSAFE,SI,SI-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
-
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=hawaii -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN,GCN-UNSAFE,CI,CI-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GCN-SAFE,CI,CI-SAFE %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-
-define amdgpu_kernel void @rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: rsq_f64:
-; GCN-UNSAFE:       ; %bb.0:
-; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; GCN-UNSAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT:    s_endpgm
-;
-; SI-SAFE-LABEL: rsq_f64:
-; SI-SAFE:       ; %bb.0:
-; SI-SAFE-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT:    s_mov_b32 s2, -1
-; SI-SAFE-NEXT:    s_mov_b32 s10, s2
-; SI-SAFE-NEXT:    s_mov_b32 s11, s3
-; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT:    s_mov_b32 s8, s6
-; SI-SAFE-NEXT:    s_mov_b32 s9, s7
-; SI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    s_mov_b32 s0, 0x3ff00000
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT:    s_mov_b32 s0, s4
-; SI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT:    s_mov_b32 s1, s5
-; SI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT:    s_endpgm
-;
-; CI-SAFE-LABEL: rsq_f64:
-; CI-SAFE:       ; %bb.0:
-; CI-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT:    s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT:    s_mov_b32 s6, -1
-; CI-SAFE-NEXT:    s_mov_b32 s10, s6
-; CI-SAFE-NEXT:    s_mov_b32 s11, s7
-; CI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT:    s_mov_b32 s8, s2
-; CI-SAFE-NEXT:    s_mov_b32 s9, s3
-; CI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT:    s_mov_b32 s4, s0
-; CI-SAFE-NEXT:    s_mov_b32 s5, s1
-; CI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; CI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], 1.0
-; CI-SAFE-NEXT:    v_div_scale_f64 v[8:9], vcc, 1.0, v[0:1], 1.0
-; CI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; CI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT:    s_endpgm
-  %val = load double, ptr addrspace(1) %in, align 4
-  %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
-  %div = fdiv double 1.0, %sqrt
-  store double %div, ptr addrspace(1) %out, align 4
-  ret void
-}
-
-define amdgpu_kernel void @neg_rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: neg_rsq_f64:
-; GCN-UNSAFE:       ; %bb.0:
-; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; GCN-UNSAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GCN-UNSAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT:    s_endpgm
-;
-; SI-SAFE-LABEL: neg_rsq_f64:
-; SI-SAFE:       ; %bb.0:
-; SI-SAFE-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT:    s_mov_b32 s2, -1
-; SI-SAFE-NEXT:    s_mov_b32 s10, s2
-; SI-SAFE-NEXT:    s_mov_b32 s11, s3
-; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT:    s_mov_b32 s8, s6
-; SI-SAFE-NEXT:    s_mov_b32 s9, s7
-; SI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    s_mov_b32 s0, 0xbff00000
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT:    s_mov_b32 s0, s4
-; SI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT:    s_mov_b32 s1, s5
-; SI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT:    s_endpgm
-;
-; CI-SAFE-LABEL: neg_rsq_f64:
-; CI-SAFE:       ; %bb.0:
-; CI-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT:    s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT:    s_mov_b32 s6, -1
-; CI-SAFE-NEXT:    s_mov_b32 s10, s6
-; CI-SAFE-NEXT:    s_mov_b32 s11, s7
-; CI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT:    s_mov_b32 s8, s2
-; CI-SAFE-NEXT:    s_mov_b32 s9, s3
-; CI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT:    s_mov_b32 s4, s0
-; CI-SAFE-NEXT:    s_mov_b32 s5, s1
-; CI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
-; CI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], -1.0
-; CI-SAFE-NEXT:    v_div_scale_f64 v[8:9], vcc, -1.0, v[0:1], -1.0
-; CI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; CI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT:    s_endpgm
-  %val = load double, ptr addrspace(1) %in, align 4
-  %sqrt = call double @llvm.sqrt.f64(double %val)
-  %div = fdiv double -1.0, %sqrt
-  store double %div, ptr addrspace(1) %out, align 4
-  ret void
-}
-
-define amdgpu_kernel void @neg_rsq_neg_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: neg_rsq_neg_f64:
-; GCN-UNSAFE:       ; %bb.0:
-; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT:    v_sqrt_f64_e64 v[0:1], -v[0:1]
-; GCN-UNSAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; GCN-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GCN-UNSAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT:    s_endpgm
-;
-; SI-SAFE-LABEL: neg_rsq_neg_f64:
-; SI-SAFE:       ; %bb.0:
-; SI-SAFE-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT:    s_mov_b32 s2, -1
-; SI-SAFE-NEXT:    s_mov_b32 s10, s2
-; SI-SAFE-NEXT:    s_mov_b32 s11, s3
-; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT:    s_mov_b32 s8, s6
-; SI-SAFE-NEXT:    s_mov_b32 s9, s7
-; SI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT:    v_sqrt_f64_e64 v[0:1], -v[0:1]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SAFE-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT:    s_mov_b32 s0, 0xbff00000
-; SI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT:    s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT:    s_mov_b32 s0, s4
-; SI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT:    s_mov_b32 s1, s5
-; SI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT:    s_endpgm
-;
-; CI-SAFE-LABEL: neg_rsq_neg_f64:
-; CI-SAFE:       ; %bb.0:
-; CI-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT:    s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT:    s_mov_b32 s6, -1
-; CI-SAFE-NEXT:    s_mov_b32 s10, s6
-; CI-SAFE-NEXT:    s_mov_b32 s11, s7
-; CI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT:    s_mov_b32 s8, s2
-; CI-SAFE-NEXT:    s_mov_b32 s9, s3
-; CI-SAFE-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT:    s_mov_b32 s4, s0
-; CI-SAFE-NEXT:    s_mov_b32 s5, s1
-; CI-SAFE-NEXT:    s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT:    v_sqrt_f64_e64 v[0:1], -v[0:1]
-; CI-SAFE-NEXT:    v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], -1.0
-; CI-SAFE-NEXT:    v_div_scale_f64 v[8:9], vcc, -1.0, v[0:1], -1.0
-; CI-SAFE-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; CI-SAFE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT:    s_endpgm
-  %val = load double, ptr addrspace(1) %in, align 4
-  %val.fneg = fneg double %val
-  %sqrt = call double @llvm.sqrt.f64(double %val.fneg)
-  %div = fdiv double -1.0, %sqrt
-  store double %div, ptr addrspace(1) %out, align 4
-  ret void
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,SI-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,SI-GISEL %s
+
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG,VI-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL,VI-GISEL %s
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare double @llvm.amdgcn.sqrt.f64(double)
+declare double @llvm.fabs.f64(double)
+
+define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.sqrt.f64(double %x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_f64_fabs:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_f64_fabs:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_f64_fabs:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT:    ; return to shader part epilog
+  %fabs.x = call double @llvm.fabs.f64(double %x)
+  %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_neg_rsq_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_neg_rsq_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_neg_rsq_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_neg_rsq_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.sqrt.f64(double %x)
+  %result = fdiv contract double -1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], -s[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], -s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], -s[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], -s[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT:    ; return to shader part epilog
+  %x.neg = fneg double %x
+  %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
+  %result = fdiv contract double -1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64_fabs(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_fabs:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_fabs:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_fabs:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_fabs:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.x = call double @llvm.fabs.f64(double %x)
+  %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract0(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract1(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_neg_rsq_f64(double %x) {
+; SI-SDAG-LABEL: v_neg_rsq_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double -1.0, %sqrt
+  ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_rsq_v2f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT:    v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_v2f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v20, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_v2f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_v2f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT:    v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_v2f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v20, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], -1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_v2f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_v2f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v16, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], s[4:5]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v16, v13
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v19
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3ff00000
+; SI-SDAG-NEXT:    v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s4, v19
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT:    s_nop 0
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v16, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v16, v13
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0x3ff00000
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT:    s_nop 1
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT:    s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define double @v_rsq_f64_fneg_fabs(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %fneg.fabs = fneg double %fabs
+  %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_sqrt(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_fdiv(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_fdiv:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_fdiv:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn(double %x) {
+; SDAG-LABEL: v_neg_rsq_f64__afn:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_neg_rsq_f64__afn:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], -1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], -1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn double -1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_ninf(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_ninf:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_ninf:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_nnan:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_nnan:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan_ninf(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
+; SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], -1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], -1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_f64__nnan_ninf(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract nnan ninf double 1.0, %sqrt
+  ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
+; SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SDAG-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], 1.0, v[0:1]
+; SI-GISEL-NEXT:    v_mul_f64 v[10:11], 1.0, v[2:3]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[10:11], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[8:9]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[10:11]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT:    v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], 1.0, v[0:1]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], 1.0, v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[8:9]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[6:7], v[2:3], v[10:11]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+  %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+  ret <2 x double> %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
+; SDAG-LABEL: s_rsq_f64_unsafe:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: s_rsq_f64_unsafe:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[2:3], s[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[2:3]
+; GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; GISEL-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.sqrt.f64(double %x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64_unsafe(double %x) #0 {
+; SDAG-LABEL: v_rsq_f64_unsafe:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64_unsafe:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv double 1.0, %sqrt
+  ret double %rsq
+}
+
+define double @v_rsq_amdgcn_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+  %rsq = fdiv contract double 1.0, %sqrt
+  ret double %rsq
 }
+
+define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+  %rsq = fdiv contract double -1.0, %sqrt
+  ret double %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT:    ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT:    ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT:    ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT:    ; return to shader part epilog
+  %rsq = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+  %result = fdiv contract double 1.0, %rsq
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
+}
+
+define double @v_div_contract_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_contract_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_contract_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_contract_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_contract_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %y)
+  %rsq = fdiv contract double %x, %sqrt
+  ret double %rsq
+}
+
+define double @v_div_arcp_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_arcp_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_arcp_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_arcp_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_arcp_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call double @llvm.sqrt.f64(double %y)
+  %rsq = fdiv arcp double %x, %sqrt
+  ret double %rsq
+}
+
+define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_contract_arcp_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_contract_arcp_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %y)
+  %rsq = fdiv contract arcp double %x, %sqrt
+  ret double %rsq
+}
+
+define double @v_div_const_contract_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_div_const_contract_sqrt_f64:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x40700000
+; SI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], s[6:7]
+; SI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    v_div_scale_f64 v[6:7], s[4:5], s[6:7], v[0:1], s[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v7
+; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT:    s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[6:7]
+; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_const_contract_sqrt_f64:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT:    s_mov_b32 s6, 0
+; SI-GISEL-NEXT:    s_mov_b32 s7, 0x40700000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0x40700000
+; SI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], s[6:7]
+; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
+; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[6:7]
+; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_const_contract_sqrt_f64:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT:    s_mov_b32 s4, 0
+; VI-SDAG-NEXT:    s_mov_b32 s5, 0x40700000
+; VI-SDAG-NEXT:    v_div_scale_f64 v[2:3], s[6:7], v[0:1], v[0:1], s[4:5]
+; VI-SDAG-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_div_scale_f64 v[6:7], vcc, s[4:5], v[0:1], s[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[4:5]
+; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_const_contract_sqrt_f64:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT:    s_mov_b32 s4, 0
+; VI-GISEL-NEXT:    s_mov_b32 s5, 0x40700000
+; VI-GISEL-NEXT:    v_div_scale_f64 v[2:3], s[6:7], v[0:1], v[0:1], s[4:5]
+; VI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_div_scale_f64 v[6:7], vcc, s[4:5], v[0:1], s[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[4:5]
+; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract double @llvm.sqrt.f64(double %x)
+  %rsq = fdiv contract double 256.0, %sqrt
+  ret double %rsq
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CI: {{.*}}
-; CI-UNSAFE: {{.*}}
 ; GCN: {{.*}}
-; GCN-SAFE: {{.*}}
-; SI: {{.*}}
-; SI-UNSAFE: {{.*}}


        


More information about the llvm-commits mailing list