[llvm] cdfdfe7 - AMDGPU: Add some additional rcp/rsq tests

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 18 03:37:20 PDT 2023


Author: Matt Arsenault
Date: 2023-07-18T06:37:15-04:00
New Revision: cdfdfe7ccc377e64dfa4975f709418a90b74271d

URL: https://github.com/llvm/llvm-project/commit/cdfdfe7ccc377e64dfa4975f709418a90b74271d
DIFF: https://github.com/llvm/llvm-project/commit/cdfdfe7ccc377e64dfa4975f709418a90b74271d.diff

LOG: AMDGPU: Add some additional rcp/rsq tests

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index bb0592465e5ae4..47b76d8981610c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-BADFREXP %s
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
 
 ; Make sure this doesn't crash with no triple
@@ -40,6 +40,12 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa
 ; IEEE-NEXT:    [[TMP2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[B]])
 ; IEEE-NEXT:    [[AFN_MD_25ULP:%.*]] = fmul afn float [[A]], [[TMP2]]
 ; IEEE-NEXT:    store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
+; IEEE-NEXT:    [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
+; IEEE-NEXT:    store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
+; IEEE-NEXT:    [[ARCP_MD_25ULP:%.*]] = fdiv arcp float [[A]], [[B]], !fpmath !0
+; IEEE-NEXT:    store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
+; IEEE-NEXT:    [[ARCP_MD_1ULP:%.*]] = fdiv arcp float [[A]], [[B]], !fpmath !2
+; IEEE-NEXT:    store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-NEXT:    ret void
 ;
 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32
@@ -60,6 +66,12 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa
 ; DAZ-NEXT:    [[TMP2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[B]])
 ; DAZ-NEXT:    [[AFN_MD_25ULP:%.*]] = fmul afn float [[A]], [[TMP2]]
 ; DAZ-NEXT:    store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
+; DAZ-NEXT:    [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
+; DAZ-NEXT:    store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
+; DAZ-NEXT:    [[ARCP_MD_25ULP:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
+; DAZ-NEXT:    store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
+; DAZ-NEXT:    [[ARCP_MD_1ULP:%.*]] = fdiv arcp float [[A]], [[B]], !fpmath !2
+; DAZ-NEXT:    store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    ret void
 ;
   %no.md = fdiv float %a, %b
@@ -76,6 +88,12 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa
   store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
   %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
   store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
+  %no.md.arcp = fdiv arcp float %a, %b
+  store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
+  %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
+  store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
+  %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
+  store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -762,6 +780,41 @@ define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrs
   ret void
 }
 
+define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) {
+; IEEE-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp
+; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[Y]], i64 0
+; IEEE-NEXT:    [[TMP3:%.*]] = fdiv arcp float [[TMP1]], [[TMP2]]
+; IEEE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
+; IEEE-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
+; IEEE-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[Y]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = fdiv arcp float [[TMP5]], [[TMP6]]
+; IEEE-NEXT:    [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP7]], i64 1
+; IEEE-NEXT:    store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
+; IEEE-NEXT:    ret void
+;
+; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp
+; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[Y]], i64 0
+; DAZ-NEXT:    [[TMP3:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP1]], float [[TMP2]])
+; DAZ-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
+; DAZ-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
+; DAZ-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[Y]], i64 1
+; DAZ-NEXT:    [[TMP7:%.*]] = call arcp float @llvm.amdgcn.fdiv.fast(float [[TMP5]], float [[TMP6]])
+; DAZ-NEXT:    [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP7]], i64 1
+; DAZ-NEXT:    store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
+; DAZ-NEXT:    ret void
+;
+  %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0
+  %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
+  store volatile <2 x float> %arcp.25ulp, ptr addrspace(1) %out, align 8
+  ret void
+}
+
 define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
@@ -998,6 +1051,24 @@ define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x)
   ret void
 }
 
+define float @rsq_f32_flag_merge(float %x) {
+; IEEE-LABEL: define float @rsq_f32_flag_merge
+; IEEE-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; IEEE-NEXT:    [[FDIV_OPENCL:%.*]] = fdiv nsz float 1.000000e+00, [[SQRT_X_3ULP]], !fpmath !2
+; IEEE-NEXT:    ret float [[FDIV_OPENCL]]
+;
+; DAZ-LABEL: define float @rsq_f32_flag_merge
+; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[SQRT_X_3ULP:%.*]] = call ninf float @llvm.sqrt.f32(float [[X]]), !fpmath !2
+; DAZ-NEXT:    [[FDIV_OPENCL:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
+; DAZ-NEXT:    ret float [[FDIV_OPENCL]]
+;
+  %sqrt.x.3ulp = call ninf float @llvm.sqrt.f32(float %x), !fpmath !2
+  %fdiv.opencl = fdiv nsz float 1.0, %sqrt.x.3ulp, !fpmath !2
+  ret float %fdiv.opencl
+}
+
 define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan,
 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite
 ; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
@@ -1522,9 +1593,366 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(
   ret void
 }
 
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) {
+; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
+; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
+; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) {
+; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
+; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[DENOM:%.*]] = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
+; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[DENOM:%.*]] = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
+  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fneg afn float [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP8]])
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul afn float 4.000000e+00, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP10]], i64 2
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = call afn float @llvm.amdgcn.rcp.f32(float [[TMP12]])
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul afn float undef, [[TMP13]]
+; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP14]], i64 3
+; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv afn <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) {
+; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
+; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float undef, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
+; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float undef, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
+  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
+; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
+; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
+; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg arcp float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv arcp float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv arcp float undef, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[DENOM:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; CHECK-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %denom = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
+  %partial.rsq = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
+; IEEE-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp
+; IEEE-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[ARG]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RCP]]
+;
+; DAZ-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp
+; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg arcp float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv arcp float 4.000000e+00, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[ARG]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv arcp float undef, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RCP]]
+;
+  %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %arg, !fpmath !2
+  ret <4 x float> %partial.rcp
+}
+
+define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv arcp float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv arcp float -1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv arcp float 4.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[ARG]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv arcp float undef, [[TMP10]]
+; CHECK-NEXT:    [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; CHECK-NEXT:    ret <4 x float> [[PARTIAL_RCP]]
+;
+  %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %arg
+  ret <4 x float> %partial.rcp
+}
+
+; Make sure we don't crash if a vector square root has a constant vecctor input
+define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) {
+; IEEE-LABEL: define <4 x float> @rsq_f32_vector_const_denom
+; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    [[SQRT:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
+; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
+; IEEE-NEXT:    [[TMP2:%.*]] = fdiv float 1.000000e+00, [[TMP1]]
+; IEEE-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; IEEE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
+; IEEE-NEXT:    [[TMP5:%.*]] = fdiv float -1.000000e+00, [[TMP4]]
+; IEEE-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1
+; IEEE-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
+; IEEE-NEXT:    [[TMP8:%.*]] = fdiv float undef, [[TMP7]]
+; IEEE-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2
+; IEEE-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
+; IEEE-NEXT:    [[TMP11:%.*]] = fdiv float 2.000000e+00, [[TMP10]]
+; IEEE-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i64 3
+; IEEE-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom
+; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[SQRT:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
+; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
+; DAZ-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
+; DAZ-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
+; DAZ-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
+; DAZ-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP5]])
+; DAZ-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP6]], i64 1
+; DAZ-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
+; DAZ-NEXT:    [[TMP9:%.*]] = fdiv float undef, [[TMP8]]
+; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP9]], i64 2
+; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
+; DAZ-NEXT:    [[TMP12:%.*]] = fdiv float 2.000000e+00, [[TMP11]]
+; DAZ-NEXT:    [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP12]], i64 3
+; DAZ-NEXT:    ret <4 x float> [[PARTIAL_RSQ]]
+;
+  %sqrt = call <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float undef>), !fpmath !2
+  %partial.rsq = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, %sqrt, !fpmath !2
+  ret <4 x float> %partial.rsq
+}
+
+define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float> %x) {
+; IEEE-LABEL: define <4 x float> @fdiv_constant_f32_vector
+; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; IEEE-NEXT:    ret <4 x float> <float 2.000000e+00, float -5.000000e-01, float 0x7FF8000000000000, float 0x3FC99999A0000000>
+;
+; DAZ-LABEL: define <4 x float> @fdiv_constant_f32_vector
+; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; DAZ-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.rcp.f32(float 5.000000e-01)
+; DAZ-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
+; DAZ-NEXT:    [[TMP3:%.*]] = call float @llvm.amdgcn.rcp.f32(float -2.000000e+00)
+; DAZ-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i64 1
+; DAZ-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 0x7FF8000000000000, i64 2
+; DAZ-NEXT:    [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP5]], float 0x3FC99999A0000000, i64 3
+; DAZ-NEXT:    ret <4 x float> [[CONST_PARTIAL_RCP]]
+;
+  %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
+  ret <4 x float> %const.partial.rcp
+}
+
 declare float @llvm.sqrt.f32(float)
 declare float @llvm.fabs.f32(float)
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 declare void @llvm.assume(i1 noundef)
 
 attributes #0 = { optnone noinline }


        


More information about the llvm-commits mailing list