[llvm] b2d58b5 - AMDGPU: Expand rsq testing to cover contract flag

Thu Jul 20 16:51:20 PDT 2023

Author: Matt Arsenault
Date: 2023-07-20T19:51:15-04:00
New Revision: b2d58b596c999ab897420715686c9b85f9fc2ed0

URL: https://github.com/llvm/llvm-project/commit/b2d58b596c999ab897420715686c9b85f9fc2ed0
DIFF: https://github.com/llvm/llvm-project/commit/b2d58b596c999ab897420715686c9b85f9fc2ed0.diff

LOG: AMDGPU: Expand rsq testing to cover contract flag

The 1.0/sqrt(x) -> rsq(x) fold increases precision and probably needs
a contract flag.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
    llvm/test/CodeGen/AMDGPU/rsq.f32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index 3cd8037b5c4c1f..7dca0743df5135 100644

--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -818,27 +818,27 @@ define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr
 define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
-; IEEE-NEXT:    [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[SQRT_X_NO_MD]]
+; IEEE-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
+; IEEE-NEXT:    [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
 ; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
-; IEEE-NEXT:    [[MD_1ULP:%.*]] = fdiv float 1.000000e+00, [[SQRT_MD_1ULP]], !fpmath !2
+; IEEE-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[MD_1ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_1ULP]], !fpmath !2
 ; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
 ; IEEE-NEXT:    store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[MD_1ULP_MULTI_USE:%.*]] = fdiv float 1.000000e+00, [[SQRT_MD_1ULP_MULTI_USE]], !fpmath !2
+; IEEE-NEXT:    [[MD_1ULP_MULTI_USE:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_1ULP_MULTI_USE]], !fpmath !2
 ; IEEE-NEXT:    store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
-; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_MD_25ULP]])
+; IEEE-NEXT:    [[SQRT_MD_25ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !0
+; IEEE-NEXT:    [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_MD_25ULP]])
 ; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
-; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
+; IEEE-NEXT:    [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
+; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
 ; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_AFN_NO_MD:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
-; IEEE-NEXT:    [[AFN_NO_MD:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_NO_MD]])
+; IEEE-NEXT:    [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
+; IEEE-NEXT:    [[AFN_NO_MD:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_NO_MD]])
 ; IEEE-NEXT:    store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_AFN_25ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
-; IEEE-NEXT:    [[AFN_25ULP:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_25ULP]])
+; IEEE-NEXT:    [[SQRT_X_AFN_25ULP:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
+; IEEE-NEXT:    [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_25ULP]])
 ; IEEE-NEXT:    store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
 ; IEEE-NEXT:    [[FAST_NO_MD:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[SQRT_X_FAST_NO_MD]])
@@ -846,46 +846,46 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
 ; IEEE-NEXT:    [[SQRT_X_FAST_25ULP:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]), !fpmath !0
 ; IEEE-NEXT:    [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[SQRT_X_FAST_25ULP]])
 ; IEEE-NEXT:    store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP]])
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[NEG_SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[NEG_FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.fdiv.fast(float -1.000000e+00, float [[NEG_SQRT_X_3ULP]])
+; IEEE-NEXT:    [[NEG_SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[NEG_FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float -1.000000e+00, float [[NEG_SQRT_X_3ULP]])
 ; IEEE-NEXT:    store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
-; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_HALF_ULP]])
+; IEEE-NEXT:    [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
+; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_HALF_ULP]])
 ; IEEE-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
-; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_MISMATCH_MD1]])
+; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_MISMATCH_MD1]])
 ; IEEE-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD2]])
+; IEEE-NEXT:    [[SQRT_MISMATCH_MD2:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_SQRT_MISMATCH_MD2:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD2]])
 ; IEEE-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
-; DAZ-NEXT:    [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[SQRT_X_NO_MD]]
+; DAZ-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
+; DAZ-NEXT:    [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
 ; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
-; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP]])
+; DAZ-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[MD_1ULP:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP]])
 ; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
 ; DAZ-NEXT:    store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[MD_1ULP_MULTI_USE:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
+; DAZ-NEXT:    [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
 ; DAZ-NEXT:    store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
-; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_25ULP]])
+; DAZ-NEXT:    [[SQRT_MD_25ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !0
+; DAZ-NEXT:    [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_25ULP]])
 ; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
-; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
+; DAZ-NEXT:    [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
+; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
 ; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_AFN_NO_MD:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
-; DAZ-NEXT:    [[AFN_NO_MD:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_NO_MD]])
+; DAZ-NEXT:    [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
+; DAZ-NEXT:    [[AFN_NO_MD:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_NO_MD]])
 ; DAZ-NEXT:    store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_AFN_25ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
-; DAZ-NEXT:    [[AFN_25ULP:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_25ULP]])
+; DAZ-NEXT:    [[SQRT_X_AFN_25ULP:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
+; DAZ-NEXT:    [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_X_AFN_25ULP]])
 ; DAZ-NEXT:    store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
 ; DAZ-NEXT:    [[FAST_NO_MD:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[SQRT_X_FAST_NO_MD]])
@@ -893,52 +893,52 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
 ; DAZ-NEXT:    [[SQRT_X_FAST_25ULP:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]), !fpmath !0
 ; DAZ-NEXT:    [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[SQRT_X_FAST_25ULP]])
 ; DAZ-NEXT:    store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[NEG_SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[TMP1:%.*]] = fneg float [[NEG_SQRT_X_3ULP]]
-; DAZ-NEXT:    [[NEG_FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[NEG_SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[TMP1:%.*]] = fneg contract float [[NEG_SQRT_X_3ULP]]
+; DAZ-NEXT:    [[NEG_FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
-; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]])
+; DAZ-NEXT:    [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
+; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]])
 ; DAZ-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
-; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]])
+; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]])
 ; DAZ-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD2]])
+; DAZ-NEXT:    [[SQRT_MISMATCH_MD2:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_SQRT_MISMATCH_MD2:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD2]])
 ; DAZ-NEXT:    store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %sqrt.x.no.md = call float @llvm.sqrt.f32(float %x)
-  %no.md = fdiv float 1.000000e+00, %sqrt.x.no.md
+  %sqrt.x.no.md = call contract float @llvm.sqrt.f32(float %x)
+  %no.md = fdiv contract float 1.000000e+00, %sqrt.x.no.md
   store volatile float %no.md, ptr addrspace(1) %out, align 4
 
   ; Matches the rsq instruction accuracy
-  %sqrt.md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
-  %md.1ulp = fdiv float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2
+  %sqrt.md.1ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
+  %md.1ulp = fdiv contract float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2
   store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
 
-  %sqrt.md.1ulp.multi.use = call float @llvm.sqrt.f32(float %x), !fpmath !2
+  %sqrt.md.1ulp.multi.use = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
   store volatile float %sqrt.md.1ulp.multi.use, ptr addrspace(1) %out, align 4
-  %md.1ulp.multi.use = fdiv float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2
+  %md.1ulp.multi.use = fdiv contract float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2
   store volatile float %md.1ulp.multi.use, ptr addrspace(1) %out, align 4
 
-  %sqrt.md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
-  %md.25ulp = fdiv float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0
+  %sqrt.md.25ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !0
+  %md.25ulp = fdiv contract float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0
   store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
 
-  %sqrt.md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
-  %md.half.ulp = fdiv float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1
+  %sqrt.md.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
+  %md.half.ulp = fdiv contract float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1
   store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.afn.no.md = call afn float @llvm.sqrt.f32(float %x)
-  %afn.no.md = fdiv afn float 1.000000e+00, %sqrt.x.afn.no.md
+  %sqrt.x.afn.no.md = call contract afn float @llvm.sqrt.f32(float %x)
+  %afn.no.md = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.no.md
   store volatile float %afn.no.md, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.afn.25ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !0
-  %afn.25ulp = fdiv afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0
+  %sqrt.x.afn.25ulp = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0
+  %afn.25ulp = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0
   store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4
 
   %sqrt.x.fast.no.md = call fast float @llvm.sqrt.f32(float %x)
@@ -953,27 +953,27 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
   ; Test mismatched metadata/flags between the sqrt and fdiv
 
   ; Test the expected opencl default pattern
-  %sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !0
+  %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !0
   store volatile float %fdiv.opencl, ptr addrspace(1) %out, align 4
 
-  %neg.sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %neg.fdiv.opencl = fdiv float -1.0, %neg.sqrt.x.3ulp, !fpmath !0
+  %neg.sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %neg.fdiv.opencl = fdiv contract float -1.0, %neg.sqrt.x.3ulp, !fpmath !0
   store volatile float %neg.fdiv.opencl, ptr addrspace(1) %out, align 4
 
   ; sqrt demands higher precision than fdiv
-  %sqrt.x.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
-  %fdiv.sqrt.mismatch.md0 = fdiv float 1.0, %sqrt.x.half.ulp, !fpmath !0
+  %sqrt.x.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
+  %fdiv.sqrt.mismatch.md0 = fdiv contract float 1.0, %sqrt.x.half.ulp, !fpmath !0
   store volatile float %fdiv.sqrt.mismatch.md0, ptr addrspace(1) %out, align 4
 
   ; sqrt demands full precision but has afn
   %sqrt.mismatch.md1 = call afn float @llvm.sqrt.f32(float %x)
-  %fdiv.sqrt.mismatch.md1 = fdiv float 1.0, %sqrt.mismatch.md1, !fpmath !0
+  %fdiv.sqrt.mismatch.md1 = fdiv contract float 1.0, %sqrt.mismatch.md1, !fpmath !0
   store volatile float %fdiv.sqrt.mismatch.md1, ptr addrspace(1) %out, align 4
 
   ; sqrt has relaxed precision fdiv has afn only
-  %sqrt.mismatch.md2 = call float @llvm.sqrt.f32(float %x), !fpmath !3
-  %fdiv.sqrt.mismatch.md2 = fdiv afn float 1.0, %sqrt.mismatch.md2
+  %sqrt.mismatch.md2 = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
+  %fdiv.sqrt.mismatch.md2 = fdiv contract afn float 1.0, %sqrt.mismatch.md2
   store volatile float %fdiv.sqrt.mismatch.md2, ptr addrspace(1) %out, align 4
 
   ret void
@@ -982,132 +982,168 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
 define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_NNAN]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_NNAN]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NINF:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NINF:%.*]] = call ninf float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NINF:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NNAN:%.*]] = call nnan float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NNAN:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NNAN]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NNAN:%.*]] = call nnan contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NNAN]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NSZ:%.*]] = call nsz float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NSZ:%.*]] = call nsz float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NSZ]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NSZ:%.*]] = call nsz contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NSZ]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_MIX0:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_MIX0]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_MIX0:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_MIX0]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_MIX1:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_MIX1]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NINF_MIX1:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NINF_MIX1]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_NNAN]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_NNAN]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NINF:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NINF:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NINF:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NNAN:%.*]] = call nnan float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NNAN:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NNAN]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NNAN:%.*]] = call nnan contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NNAN]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NSZ:%.*]] = call nsz float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NSZ:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NSZ]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NSZ:%.*]] = call nsz contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NSZ]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_MIX0:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_MIX0]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_MIX0:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_MIX0]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_MIX1:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_MIX1]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NINF_MIX1:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NINF_MIX1]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %sqrt.x.3ulp.ninf.nnan = call ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl.ninf.nnan = fdiv ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0
+  %sqrt.x.3ulp.ninf.nnan = call contract ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl.ninf.nnan = fdiv contract ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0
   store volatile float %fdiv.opencl.ninf.nnan, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.ninf = call ninf float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl.ninf = fdiv ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0
+  %sqrt.x.3ulp.ninf = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl.ninf = fdiv contract ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0
   store volatile float %fdiv.opencl.ninf, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.nnan = call nnan float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl.nnan = fdiv nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0
+  %sqrt.x.3ulp.nnan = call contract nnan float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl.nnan = fdiv contract nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0
   store volatile float %fdiv.opencl.nnan, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.nsz = call nsz float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl.nsz = fdiv nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0
+  %sqrt.x.3ulp.nsz = call contract nsz float @llvm.sqrt.f32(float %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl.nsz = fdiv contract nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0
   store volatile float %fdiv.opencl.nsz, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.ninf.mix0 = call ninf float @llvm.sqrt.f32(float %x), !fpmath !3
-  %fdiv.opencl.nnan.mix0 = fdiv nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0
+  %sqrt.x.3ulp.ninf.mix0 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
+  %fdiv.opencl.nnan.mix0 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0
   store volatile float %fdiv.opencl.nnan.mix0, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.ninf.mix1 = call ninf float @llvm.sqrt.f32(float %x), !fpmath !3
-  %fdiv.opencl.nnan.mix1 = fdiv nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0
+  %sqrt.x.3ulp.ninf.mix1 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
+  %fdiv.opencl.nnan.mix1 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0
   store volatile float %fdiv.opencl.nnan.mix1, ptr addrspace(1) %out, align 4
 
   ret void
 }
 
+define float @rsq_f32_missing_contract0(float %x) {
+; IEEE-LABEL: define float @rsq_f32_missing_contract0
+; IEEE-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_3ULP]], !fpmath !2
+; IEEE-NEXT:    ret float [[FDIV_OPENCL]]
+;
+; DAZ-LABEL: define float @rsq_f32_missing_contract0
+; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
+; DAZ-NEXT:    ret float [[FDIV_OPENCL]]
+;
+  %sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
+  %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !2
+  ret float %fdiv.opencl
+}
+
+define float @rsq_f32_missing_contract1(float %x) {
+; IEEE-LABEL: define float @rsq_f32_missing_contract1
+; IEEE-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = fdiv float 1.000000e+00, [[SQRT_X_3ULP]], !fpmath !2
+; IEEE-NEXT:    ret float [[FDIV_OPENCL]]
+;
+; DAZ-LABEL: define float @rsq_f32_missing_contract1
+; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
+; DAZ-NEXT:    ret float [[FDIV_OPENCL]]
+;
+  %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
+  %fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !2
+  ret float %fdiv.opencl
+}
+
 define float @rsq_f32_flag_merge(float %x) {
 ; IEEE-LABEL: define float @rsq_f32_flag_merge
 ; IEEE-SAME: (float [[X:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !2
-; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = fdiv nsz float 1.000000e+00, [[SQRT_X_3ULP]], !fpmath !2
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = fdiv nsz contract float 1.000000e+00, [[SQRT_X_3ULP]], !fpmath !2
 ; IEEE-NEXT:    ret float [[FDIV_OPENCL]]
 ;
 ; DAZ-LABEL: define float @rsq_f32_flag_merge
 ; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !2
-; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call nsz contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
 ; DAZ-NEXT:    ret float [[FDIV_OPENCL]]
 ;
-  %sqrt.x.3ulp = call ninf float @llvm.sqrt.f32(float %x), !fpmath !2
-  %fdiv.opencl = fdiv nsz float 1.0, %sqrt.x.3ulp, !fpmath !2
+  %sqrt.x.3ulp = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !2
+  %fdiv.opencl = fdiv contract nsz float 1.0, %sqrt.x.3ulp, !fpmath !2
   ret float %fdiv.opencl
 }
 
 define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan,
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_NAN:%.*]] = call float @llvm.sqrt.f32(float [[NO_NAN]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_NAN:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_NAN]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_NAN:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_NAN]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_NAN]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_INF:%.*]] = call float @llvm.sqrt.f32(float [[NO_INF]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_INF:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_INF]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_INF:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_INF]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_INF]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_INF_NAN:%.*]] = call float @llvm.sqrt.f32(float [[NO_INF_NAN]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_INF_NAN]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_INF_NAN:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_INF_NAN]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_INF_NAN]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_NAN:%.*]] = call float @llvm.sqrt.f32(float [[NO_NAN]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_NAN]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_NAN:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_NAN]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_NAN]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_INF:%.*]] = call float @llvm.sqrt.f32(float [[NO_INF]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_INF:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_INF]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_INF:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_INF]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_INF]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_INF_NAN:%.*]] = call float @llvm.sqrt.f32(float [[NO_INF_NAN]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_INF_NAN]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_INF_NAN:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_INF_NAN]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_INF_NAN]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
   float nofpclass(nan) %no.inf,
   float nofpclass(inf nan) %no.inf.nan) {
-  %sqrt.x.3ulp.no.nan = call float @llvm.sqrt.f32(float %no.nan), !fpmath !3
-  %fdiv.opencl.no.nan = fdiv float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0
+  %sqrt.x.3ulp.no.nan = call contract float @llvm.sqrt.f32(float %no.nan), !fpmath !3
+  %fdiv.opencl.no.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0
   store volatile float %fdiv.opencl.no.nan, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.no.inf = call float @llvm.sqrt.f32(float %no.inf), !fpmath !3
-  %fdiv.opencl.no.inf = fdiv float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0
+  %sqrt.x.3ulp.no.inf = call contract float @llvm.sqrt.f32(float %no.inf), !fpmath !3
+  %fdiv.opencl.no.inf = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0
   store volatile float %fdiv.opencl.no.inf, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.no.inf.nan = call float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3
-  %fdiv.opencl.no.inf.nan = fdiv float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0
+  %sqrt.x.3ulp.no.inf.nan = call contract float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3
+  %fdiv.opencl.no.inf.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0
   store volatile float %fdiv.opencl.no.inf.nan, ptr addrspace(1) %out, align 4
 
   ret void
@@ -1116,30 +1152,30 @@ define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofp
 define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nofpclass(zero) %no.zero, float nofpclass(zero sub) %no.zero.sub) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_ZERO:%.*]] = call float @llvm.sqrt.f32(float [[NO_ZERO]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_ZERO:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_ZERO]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_ZERO:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_ZERO]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_ZERO]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_ZERO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_ZERO_SUB]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_ZERO_SUB]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_ZERO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_ZERO_SUB]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_ZERO_SUB]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_ZERO:%.*]] = call float @llvm.sqrt.f32(float [[NO_ZERO]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_ZERO:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_ZERO]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_ZERO:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_ZERO]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_ZERO]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_ZERO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_ZERO_SUB]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_ZERO_SUB]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_ZERO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_ZERO_SUB]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_ZERO_SUB]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %sqrt.x.3ulp.no.zero = call float @llvm.sqrt.f32(float %no.zero), !fpmath !3
-  %fdiv.opencl.no.zero = fdiv float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0
+  %sqrt.x.3ulp.no.zero = call contract float @llvm.sqrt.f32(float %no.zero), !fpmath !3
+  %fdiv.opencl.no.zero = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0
   store volatile float %fdiv.opencl.no.zero, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.no.zero.sub = call float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3
-  %fdiv.opencl.no.zero.sub = fdiv float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0
+  %sqrt.x.3ulp.no.zero.sub = call contract float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3
+  %fdiv.opencl.no.zero.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0
   store volatile float %fdiv.opencl.no.zero.sub, ptr addrspace(1) %out, align 4
 
   ret void
@@ -1148,40 +1184,40 @@ define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nof
 define amdgpu_kernel void @rsq_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %no.sub, float nofpclass(psub) %no.psub, float nofpclass(nsub) %no.nsub) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_SUB]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_SUB]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_SUB]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_SUB]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_PSUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_PSUB]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_PSUB:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_PSUB]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_PSUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_PSUB]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_PSUB]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_NSUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_NSUB]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_NSUB:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_NSUB]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_NSUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_NSUB]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_NSUB]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_SUB]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_SUB]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_SUB]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_SUB]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_PSUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_PSUB]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_PSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_PSUB]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_PSUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_PSUB]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_PSUB]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_NSUB:%.*]] = call float @llvm.sqrt.f32(float [[NO_NSUB]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_NSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_NSUB]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_NSUB:%.*]] = call contract float @llvm.sqrt.f32(float [[NO_NSUB]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_NSUB]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %sqrt.x.3ulp.no.sub = call float @llvm.sqrt.f32(float %no.sub), !fpmath !3
-  %fdiv.opencl.no.sub = fdiv float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
+  %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %no.sub), !fpmath !3
+  %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
   store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.no.psub = call float @llvm.sqrt.f32(float %no.psub), !fpmath !3
-  %fdiv.opencl.no.psub = fdiv float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0
+  %sqrt.x.3ulp.no.psub = call contract float @llvm.sqrt.f32(float %no.psub), !fpmath !3
+  %fdiv.opencl.no.psub = fdiv contract float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0
   store volatile float %fdiv.opencl.no.psub, ptr addrspace(1) %out, align 4
 
-  %sqrt.x.3ulp.no.nsub = call float @llvm.sqrt.f32(float %no.nsub), !fpmath !3
-  %fdiv.opencl.no.nsub = fdiv float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0
+  %sqrt.x.3ulp.no.nsub = call contract float @llvm.sqrt.f32(float %no.nsub), !fpmath !3
+  %fdiv.opencl.no.nsub = fdiv contract float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0
   store volatile float %fdiv.opencl.no.nsub, ptr addrspace(1) %out, align 4
 
   ret void
@@ -1193,8 +1229,8 @@ define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x)
 ; IEEE-NEXT:    [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
 ; IEEE-NEXT:    [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
 ; IEEE-NEXT:    call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
-; IEEE-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; IEEE-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_SUB]])
+; IEEE-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; IEEE-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[SQRT_X_3ULP_NO_SUB]])
 ; IEEE-NEXT:    store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
@@ -1203,16 +1239,16 @@ define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x)
 ; DAZ-NEXT:    [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
 ; DAZ-NEXT:    [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
 ; DAZ-NEXT:    call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
-; DAZ-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
-; DAZ-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_SUB]])
+; DAZ-NEXT:    [[SQRT_X_3ULP_NO_SUB:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
+; DAZ-NEXT:    [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP_NO_SUB]])
 ; DAZ-NEXT:    store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
   %fabs.x = call float @llvm.fabs.f32(float %x)
   %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
   call void @llvm.assume(i1 %is.not.subnormal)
-  %sqrt.x.3ulp.no.sub = call float @llvm.sqrt.f32(float %x), !fpmath !3
-  %fdiv.opencl.no.sub = fdiv float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
+  %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
+  %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
   store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -1220,95 +1256,95 @@ define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x)
 define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT_X_NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
+; IEEE-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_X_NO_MD]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[SQRT_X_NO_MD]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float 1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv contract float 1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[NO_MD:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_1ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
+; IEEE-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
 ; IEEE-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
-; IEEE-NEXT:    [[TMP7:%.*]] = fdiv float 1.000000e+00, [[TMP6]]
+; IEEE-NEXT:    [[TMP7:%.*]] = fdiv contract float 1.000000e+00, [[TMP6]]
 ; IEEE-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0
 ; IEEE-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
-; IEEE-NEXT:    [[TMP10:%.*]] = fdiv float 1.000000e+00, [[TMP9]]
+; IEEE-NEXT:    [[TMP10:%.*]] = fdiv contract float 1.000000e+00, [[TMP9]]
 ; IEEE-NEXT:    [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP10]], i64 1
 ; IEEE-NEXT:    store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_MD_1ULP_UNDEF:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
+; IEEE-NEXT:    [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
 ; IEEE-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
-; IEEE-NEXT:    [[TMP12:%.*]] = fdiv float 1.000000e+00, [[TMP11]]
+; IEEE-NEXT:    [[TMP12:%.*]] = fdiv contract float 1.000000e+00, [[TMP11]]
 ; IEEE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
 ; IEEE-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
-; IEEE-NEXT:    [[TMP15:%.*]] = fdiv float undef, [[TMP14]]
+; IEEE-NEXT:    [[TMP15:%.*]] = fdiv contract float undef, [[TMP14]]
 ; IEEE-NEXT:    [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP15]], i64 1
 ; IEEE-NEXT:    store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
-; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
 ; IEEE-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
-; IEEE-NEXT:    [[TMP17:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[TMP16]])
+; IEEE-NEXT:    [[TMP17:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[TMP16]])
 ; IEEE-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i64 0
 ; IEEE-NEXT:    [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
-; IEEE-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[TMP19]])
+; IEEE-NEXT:    [[TMP20:%.*]] = call contract float @llvm.amdgcn.fdiv.fast(float 1.000000e+00, float [[TMP19]])
 ; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP20]], i64 1
 ; IEEE-NEXT:    store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT_X_NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
+; DAZ-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_X_NO_MD]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; DAZ-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[SQRT_X_NO_MD]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fdiv float 1.000000e+00, [[TMP4]]
+; DAZ-NEXT:    [[TMP5:%.*]] = fdiv contract float 1.000000e+00, [[TMP4]]
 ; DAZ-NEXT:    [[NO_MD:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP5]], i64 1
 ; DAZ-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_1ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
+; DAZ-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
 ; DAZ-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
-; DAZ-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
+; DAZ-NEXT:    [[TMP7:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
 ; DAZ-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0
 ; DAZ-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
-; DAZ-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP9]])
+; DAZ-NEXT:    [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; DAZ-NEXT:    [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP10]], i64 1
 ; DAZ-NEXT:    store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_MD_1ULP_UNDEF:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
+; DAZ-NEXT:    [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
-; DAZ-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
+; DAZ-NEXT:    [[TMP12:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP11]])
 ; DAZ-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
 ; DAZ-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
-; DAZ-NEXT:    [[TMP15:%.*]] = fdiv float undef, [[TMP14]]
+; DAZ-NEXT:    [[TMP15:%.*]] = fdiv contract float undef, [[TMP14]]
 ; DAZ-NEXT:    [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP15]], i64 1
 ; DAZ-NEXT:    store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
-; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
 ; DAZ-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
-; DAZ-NEXT:    [[TMP17:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
+; DAZ-NEXT:    [[TMP17:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP16]])
 ; DAZ-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i64 0
 ; DAZ-NEXT:    [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
-; DAZ-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]])
+; DAZ-NEXT:    [[TMP20:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
 ; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP20]], i64 1
 ; DAZ-NEXT:    store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %sqrt.x.no.md = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
-  %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md
+  %sqrt.x.no.md = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
+  %no.md = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md
   store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4
 
   ; Matches the rsq instruction accuracy
-  %sqrt.md.1ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
-  %md.1ulp = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2
+  %sqrt.md.1ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
+  %md.1ulp = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2
   store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4
 
   ; Matches the rsq instruction accuracy
-  %sqrt.md.1ulp.undef = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
-  %md.1ulp.undef = fdiv <2 x float> <float 1.0, float undef>, %sqrt.md.1ulp.undef, !fpmath !2
+  %sqrt.md.1ulp.undef = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
+  %md.1ulp.undef = fdiv contract <2 x float> <float 1.0, float undef>, %sqrt.md.1ulp.undef, !fpmath !2
   store volatile <2 x float> %md.1ulp.undef, ptr addrspace(1) %out, align 4
 
   ; Test mismatched metadata/flags between the sqrt and fdiv
 
   ; Test the expected opencl default pattern
-  %sqrt.x.3ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
-  %fdiv.opencl = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0
+  %sqrt.x.3ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3  ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
+  %fdiv.opencl = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0
   store volatile <2 x float> %fdiv.opencl, ptr addrspace(1) %out, align 4
 
   ret void
@@ -1481,25 +1517,25 @@ define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(ptr addrspace(1)
 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %sqr.denom) {
 ; IEEE-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
-; IEEE-NEXT:    [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]], !fpmath !0
-; IEEE-NEXT:    [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]], !fpmath !0
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
+; IEEE-NEXT:    [[ARCP0:%.*]] = fdiv arcp contract float [[X]], [[DENOM]], !fpmath !0
+; IEEE-NEXT:    [[ARCP1:%.*]] = fdiv arcp contract float [[Y]], [[DENOM]], !fpmath !0
 ; IEEE-NEXT:    store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
-; DAZ-NEXT:    [[ARCP0:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[X]], float [[DENOM]])
-; DAZ-NEXT:    [[ARCP1:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[Y]], float [[DENOM]])
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
+; DAZ-NEXT:    [[ARCP0:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[X]], float [[DENOM]])
+; DAZ-NEXT:    [[ARCP1:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[Y]], float [[DENOM]])
 ; DAZ-NEXT:    store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %denom = call float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
-  %arcp0 = fdiv arcp float %x, %denom, !fpmath !0
-  %arcp1 = fdiv arcp float %y, %denom, !fpmath !0
+  %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
+  %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
+  %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
   store volatile float %arcp0, ptr addrspace(1) %out
   store volatile float %arcp1, ptr addrspace(1) %out
   ret void
@@ -1508,22 +1544,22 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1)
 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %sqr.denom) {
 ; IEEE-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; IEEE-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP3:%.*]] = fdiv arcp float [[TMP1]], [[TMP2]]
+; IEEE-NEXT:    [[TMP3:%.*]] = fdiv arcp contract float [[TMP1]], [[TMP2]]
 ; IEEE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
 ; IEEE-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; IEEE-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP7:%.*]] = fdiv arcp float [[TMP5]], [[TMP6]]
+; IEEE-NEXT:    [[TMP7:%.*]] = fdiv arcp contract float [[TMP5]], [[TMP6]]
 ; IEEE-NEXT:    [[ARCP0:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP7]], i64 1
 ; IEEE-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; IEEE-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP10:%.*]] = fdiv arcp float [[TMP8]], [[TMP9]]
+; IEEE-NEXT:    [[TMP10:%.*]] = fdiv arcp contract float [[TMP8]], [[TMP9]]
 ; IEEE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
 ; IEEE-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[Y]], i64 1
 ; IEEE-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP14:%.*]] = fdiv arcp float [[TMP12]], [[TMP13]]
+; IEEE-NEXT:    [[TMP14:%.*]] = fdiv arcp contract float [[TMP12]], [[TMP13]]
 ; IEEE-NEXT:    [[ARCP1:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP14]], i64 1
 ; IEEE-NEXT:    store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
 ; IEEE-NEXT:    store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
@@ -1531,30 +1567,30 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrsp
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
 ; DAZ-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP3:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP1]], float [[TMP2]])
+; DAZ-NEXT:    [[TMP3:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[TMP1]], float [[TMP2]])
 ; DAZ-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
 ; DAZ-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[X]], i64 1
 ; DAZ-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP7:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP5]], float [[TMP6]])
+; DAZ-NEXT:    [[TMP7:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[TMP5]], float [[TMP6]])
 ; DAZ-NEXT:    [[ARCP0:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP7]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[Y]], i64 0
 ; DAZ-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP10:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP8]], float [[TMP9]])
+; DAZ-NEXT:    [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[TMP8]], float [[TMP9]])
 ; DAZ-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
 ; DAZ-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[Y]], i64 1
 ; DAZ-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP14:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP12]], float [[TMP13]])
+; DAZ-NEXT:    [[TMP14:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[TMP12]], float [[TMP13]])
 ; DAZ-NEXT:    [[ARCP1:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP14]], i64 1
 ; DAZ-NEXT:    store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
 ; DAZ-NEXT:    store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
 ; DAZ-NEXT:    ret void
 ;
-  %denom = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3
-  %arcp0 = fdiv arcp <2 x float> %x, %denom, !fpmath !0
-  %arcp1 = fdiv arcp <2 x float> %y, %denom, !fpmath !0
+  %denom = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3
+  %arcp0 = fdiv contract arcp <2 x float> %x, %denom, !fpmath !0
+  %arcp1 = fdiv contract arcp <2 x float> %y, %denom, !fpmath !0
   store volatile <2 x float> %arcp0, ptr addrspace(1) %out
   store volatile <2 x float> %arcp1, ptr addrspace(1) %out
   ret void
@@ -1563,10 +1599,10 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrsp
 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %sqr.denom) {
 ; IEEE-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
-; IEEE-NEXT:    [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]], !fpmath !0
-; IEEE-NEXT:    [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]], !fpmath !0
-; IEEE-NEXT:    [[ARCP2:%.*]] = fdiv arcp float [[Z]], [[DENOM]], !fpmath !0
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
+; IEEE-NEXT:    [[ARCP0:%.*]] = fdiv arcp contract float [[X]], [[DENOM]], !fpmath !0
+; IEEE-NEXT:    [[ARCP1:%.*]] = fdiv arcp contract float [[Y]], [[DENOM]], !fpmath !0
+; IEEE-NEXT:    [[ARCP2:%.*]] = fdiv arcp contract float [[Z]], [[DENOM]], !fpmath !0
 ; IEEE-NEXT:    store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
@@ -1574,19 +1610,19 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
-; DAZ-NEXT:    [[ARCP0:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[X]], float [[DENOM]])
-; DAZ-NEXT:    [[ARCP1:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[Y]], float [[DENOM]])
-; DAZ-NEXT:    [[ARCP2:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[Z]], float [[DENOM]])
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
+; DAZ-NEXT:    [[ARCP0:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[X]], float [[DENOM]])
+; DAZ-NEXT:    [[ARCP1:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[Y]], float [[DENOM]])
+; DAZ-NEXT:    [[ARCP2:%.*]] = call arcp contract float @llvm.amdgcn.fdiv.fast(float [[Z]], float [[DENOM]])
 ; DAZ-NEXT:    store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
-  %denom = call float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
-  %arcp0 = fdiv arcp float %x, %denom, !fpmath !0
-  %arcp1 = fdiv arcp float %y, %denom, !fpmath !0
-  %arcp2 = fdiv arcp float %z, %denom, !fpmath !0
+  %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
+  %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
+  %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
+  %arcp2 = fdiv contract arcp float %z, %denom, !fpmath !0
   store volatile float %arcp0, ptr addrspace(1) %out
   store volatile float %arcp1, ptr addrspace(1) %out
   store volatile float %arcp2, ptr addrspace(1) %out
@@ -1596,236 +1632,236 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) {
 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
 ; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv contract float -1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv contract float 4.000000e+00, [[TMP7]]
 ; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv contract float undef, [[TMP10]]
 ; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
 ; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP2:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg contract float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv contract float 4.000000e+00, [[TMP8]]
 ; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv contract float undef, [[TMP11]]
 ; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
 ; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
-  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) {
 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
 ; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv contract float -1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv contract float 4.000000e+00, [[TMP7]]
 ; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv contract float undef, [[TMP10]]
 ; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
 ; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP2:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg contract float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv contract float 4.000000e+00, [[TMP8]]
 ; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv contract float undef, [[TMP11]]
 ; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
 ; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
-  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  %denom = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
+  %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fneg afn float [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; CHECK-NEXT:    [[TMP5:%.*]] = fneg contract afn float [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; CHECK-NEXT:    [[TMP9:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP8]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fmul afn float 4.000000e+00, [[TMP9]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[TMP8]])
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul contract afn float 4.000000e+00, [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP10]], i64 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; CHECK-NEXT:    [[TMP13:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP12]])
-; CHECK-NEXT:    [[TMP14:%.*]] = fmul afn float undef, [[TMP13]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call contract afn float @llvm.amdgcn.rcp.f32(float [[TMP12]])
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul contract afn float undef, [[TMP13]]
 ; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP14]], i64 3
 ; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
-  %partial.rsq = fdiv afn <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv contract afn <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv contract float -1.000000e+00, [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; CHECK-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv contract float 4.000000e+00, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; CHECK-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv contract float undef, [[TMP10]]
 ; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
-  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) {
 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
 ; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv contract float -1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv contract float 4.000000e+00, [[TMP7]]
 ; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv contract float undef, [[TMP10]]
 ; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
 ; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP2:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg contract float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv contract float 4.000000e+00, [[TMP8]]
 ; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv contract float undef, [[TMP11]]
 ; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
 ; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
-  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
+  %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
 ; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; IEEE-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv arcp contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv arcp contract float -1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; IEEE-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv arcp contract float 4.000000e+00, [[TMP7]]
 ; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; IEEE-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv arcp contract float undef, [[TMP10]]
 ; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
 ; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; DAZ-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fneg arcp float [[TMP4]]
-; DAZ-NEXT:    [[TMP6:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg arcp contract float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; DAZ-NEXT:    [[TMP9:%.*]] = fdiv arcp float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv arcp contract float 4.000000e+00, [[TMP8]]
 ; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; DAZ-NEXT:    [[TMP12:%.*]] = fdiv arcp float undef, [[TMP11]]
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv arcp contract float undef, [[TMP11]]
 ; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
 ; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
-  %partial.rsq = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
   ret <4 x float> %partial.rsq
 }
 
 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv arcp contract float 1.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv arcp contract float -1.000000e+00, [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
-; CHECK-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv arcp contract float 4.000000e+00, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
-; CHECK-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv arcp contract float undef, [[TMP10]]
 ; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
-  %partial.rsq = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
   ret <4 x float> %partial.rsq
 }
 
@@ -1892,41 +1928,41 @@ define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x fl
 define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) {
 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_const_denom
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; IEEE-NEXT:    [[SQRT:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
+; IEEE-NEXT:    [[SQRT:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
 ; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
-; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv contract float 1.000000e+00, [[TMP1]]
 ; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
-; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv contract float -1.000000e+00, [[TMP4]]
 ; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
 ; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
-; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float undef, [[TMP7]]
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv contract float undef, [[TMP7]]
 ; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
 ; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
-; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float 2.000000e+00, [[TMP10]]
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv contract float 2.000000e+00, [[TMP10]]
 ; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
 ; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom
 ; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; DAZ-NEXT:    [[SQRT:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
+; DAZ-NEXT:    [[SQRT:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
-; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP2:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
-; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg contract float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
 ; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
-; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float undef, [[TMP8]]
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv contract float undef, [[TMP8]]
 ; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
 ; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
-; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float 2.000000e+00, [[TMP11]]
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv contract float 2.000000e+00, [[TMP11]]
 ; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
 ; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
 ;
-  %sqrt = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float undef>), !fpmath !2
-  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, %sqrt, !fpmath !2
+  %sqrt = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float undef>), !fpmath !2
+  %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, %sqrt, !fpmath !2
   ret <4 x float> %partial.rsq
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
index cbe5102cddb899..4324d6911ca9d3 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
@@ -96,8 +96,8 @@ define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(
 ; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GCN-UNSAFE-NEXT:    s_endpgm
   %val = load float, ptr addrspace(1) %in, align 4
-  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
-  %div = fdiv float 1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !0
   store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -152,8 +152,8 @@ define amdgpu_kernel void @rsq_f32_sgpr(ptr addrspace(1) noalias %out, float %va
 ; GCN-UNSAFE-NEXT:    s_mov_b32 s2, -1
 ; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-UNSAFE-NEXT:    s_endpgm
-  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
-  %div = fdiv float 1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !0
   store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -311,9 +311,9 @@ define amdgpu_kernel void @rsqrt_fmul(ptr addrspace(1) %out, ptr addrspace(1) %i
   %b = load volatile float, ptr addrspace(1) %gep.1
   %c = load volatile float, ptr addrspace(1) %gep.2
 
-  %x = call float @llvm.sqrt.f32(float %a)
-  %y = fmul float %x, %b
-  %z = fdiv float %c, %y
+  %x = call contract float @llvm.sqrt.f32(float %a)
+  %y = fmul contract float %x, %b
+  %z = fdiv contract float %c, %y
   store float %z, ptr addrspace(1) %out.gep
   ret void
 }
@@ -400,8 +400,8 @@ define amdgpu_kernel void @neg_rsq_f32(ptr addrspace(1) noalias %out, ptr addrsp
 ; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GCN-UNSAFE-NEXT:    s_endpgm
   %val = load float, ptr addrspace(1) %in, align 4
-  %sqrt = call float @llvm.sqrt.f32(float %val)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
   store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -489,8 +489,8 @@ define amdgpu_kernel void @neg_rsq_neg_f32(ptr addrspace(1) noalias %out, ptr ad
 ; GCN-UNSAFE-NEXT:    s_endpgm
   %val = load float, ptr addrspace(1) %in, align 4
   %val.fneg = fneg float %val
-  %sqrt = call float @llvm.sqrt.f32(float %val.fneg)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
   store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -523,8 +523,8 @@ define float @v_neg_rsq_neg_f32(float %val) {
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v1, v0
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
   %val.fneg = fneg float %val
-  %sqrt = call float @llvm.sqrt.f32(float %val.fneg)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
   ret float %div
 }
 
@@ -566,8 +566,8 @@ define <2 x float> @v_neg_rsq_neg_v2f32(<2 x float> %val) {
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v2, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
   %val.fneg = fneg <2 x float> %val
-  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg)
-  %div = fdiv <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg)
+  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
   ret <2 x float> %div
 }
 
@@ -602,9 +602,9 @@ define float @v_neg_rsq_neg_f32_foldable_user(float %val0, float %val1) {
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
   %val0.neg = fneg float %val0
-  %sqrt = call float @llvm.sqrt.f32(float %val0.neg)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
-  %user = fmul float %div, %val1
+  %sqrt = call contract float @llvm.sqrt.f32(float %val0.neg)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+  %user = fmul contract float %div, %val1
   ret float %user
 }
 
@@ -652,9 +652,9 @@ define <2 x float> @v_neg_rsq_neg_v2f32_foldable_user(<2 x float> %val0, <2 x fl
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
   %val0.fneg = fneg <2 x float> %val0
-  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg)
-  %div = fdiv <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
-  %user = fmul <2 x float> %div, %val1
+  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg)
+  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+  %user = fmul contract <2 x float> %div, %val1
   ret <2 x float> %user
 }
 
@@ -685,8 +685,8 @@ define float @v_neg_rsq_f32(float %val) {
 ; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v1, v0
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call float @llvm.sqrt.f32(float %val)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
+  %sqrt = call contract float @llvm.sqrt.f32(float %val)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
   ret float %div
 }
 
@@ -727,8 +727,8 @@ define <2 x float> @v_neg_rsq_v2f32(<2 x float> %val) {
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v3, v0
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v2, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
-  %div = fdiv <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
+  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
   ret <2 x float> %div
 }
 
@@ -762,9 +762,9 @@ define float @v_neg_rsq_f32_foldable_user(float %val0, float %val1) {
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v2, v0
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call float @llvm.sqrt.f32(float %val0)
-  %div = fdiv float -1.0, %sqrt, !fpmath !0
-  %user = fmul float %div, %val1
+  %sqrt = call contract float @llvm.sqrt.f32(float %val0)
+  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+  %user = fmul contract float %div, %val1
   ret float %user
 }
 
@@ -811,9 +811,9 @@ define <2 x float> @v_neg_rsq_v2f32_foldable_user(<2 x float> %val0, <2 x float>
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0)
-  %div = fdiv <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
-  %user = fmul <2 x float> %div, %val1
+  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0)
+  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+  %user = fmul contract <2 x float> %div, %val1
   ret <2 x float> %user
 }
 
@@ -845,8 +845,76 @@ define float @v_rsq_f32(float %val) {
 ; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v1, v4, v3
 ; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
 ; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+  ret float %div
+}
+
+define float @v_rsq_f32_missing_contract0(float %val) {
+; GCN-DAZ-LABEL: v_rsq_f32_missing_contract0:
+; GCN-DAZ:       ; %bb.0:
+; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract0:
+; GCN-IEEE-UNSAFE:       ; %bb.0:
+; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract0:
+; GCN-IEEE-SAFE:       ; %bb.0:
+; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v1
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v2, v4, v2, v2
+; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call float @llvm.sqrt.f32(float %val), !fpmath !1
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+  ret float %div
+}
+
+define float @v_rsq_f32_missing_contract1(float %val) {
+; GCN-DAZ-LABEL: v_rsq_f32_missing_contract1:
+; GCN-DAZ:       ; %bb.0:
+; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract1:
+; GCN-IEEE-UNSAFE:       ; %bb.0:
+; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract1:
+; GCN-IEEE-SAFE:       ; %bb.0:
+; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v1
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v2, v4, v2, v2
+; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
   %div = fdiv float 1.0, %sqrt, !fpmath !1
   ret float %div
 }
@@ -885,7 +953,7 @@ define float @v_rsq_f32_contractable_user(float %val0, float %val1) {
 ; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
 ; GCN-IEEE-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call float @llvm.sqrt.f32(float %val0), !fpmath !1
+  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
   %div = fdiv contract float 1.0, %sqrt, !fpmath !1
   %add = fadd contract float %div, %val1
   ret float %add
@@ -924,8 +992,8 @@ define float @v_rsq_f32_contractable_user_missing_contract0(float %val0, float %
 ; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
 ; GCN-IEEE-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call float @llvm.sqrt.f32(float %val0), !fpmath !1
-  %div = fdiv float 1.0, %sqrt, !fpmath !1
+  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
   %add = fadd contract float %div, %val1
   ret float %add
 }
@@ -963,12 +1031,80 @@ define float @v_rsq_f32_contractable_user_missing_contract1(float %val0, float %
 ; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
 ; GCN-IEEE-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
-  %sqrt = call float @llvm.sqrt.f32(float %val0), !fpmath !1
+  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
   %div = fdiv contract float 1.0, %sqrt, !fpmath !1
   %add = fadd float %div, %val1
   ret float %add
 }
 
+define float @v_rsq_f32_known_never_denormal(float nofpclass(sub) %val) {
+; GCN-DAZ-LABEL: v_rsq_f32_known_never_denormal:
+; GCN-DAZ:       ; %bb.0:
+; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_known_never_denormal:
+; GCN-IEEE-UNSAFE:       ; %bb.0:
+; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-SAFE-LABEL: v_rsq_f32_known_never_denormal:
+; GCN-IEEE-SAFE:       ; %bb.0:
+; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v1
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v2, v4, v2, v2
+; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+  ret float %div
+}
+
+define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) {
+; GCN-DAZ-LABEL: v_rsq_f32_known_never_posdenormal:
+; GCN-DAZ:       ; %bb.0:
+; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_known_never_posdenormal:
+; GCN-IEEE-UNSAFE:       ; %bb.0:
+; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
+; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-IEEE-SAFE-LABEL: v_rsq_f32_known_never_posdenormal:
+; GCN-IEEE-SAFE:       ; %bb.0:
+; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v1
+; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v2, v4, v2, v2
+; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
+  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
+  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+  ret float %div
+}
+
 !0 = !{float 2.500000e+00}
 !1 = !{float 1.000000e+00}