[llvm] 65a3de9 - [X86] Pre-commit tests to show the problem of SQRT when `RefinementSteps` = 0. NFC

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 30 19:54:00 PST 2021


Author: Phoebe Wang
Date: 2021-12-01T11:53:44+08:00
New Revision: 65a3de91ab3e682b732c830e555339269d002295

URL: https://github.com/llvm/llvm-project/commit/65a3de91ab3e682b732c830e555339269d002295
DIFF: https://github.com/llvm/llvm-project/commit/65a3de91ab3e682b732c830e555339269d002295.diff

LOG: [X86] Pre-commit tests to show the problem of SQRT when `RefinementSteps` = 0. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
    llvm/test/CodeGen/X86/sqrt-fastmath.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index 04ad68fdc893f..34f13911362ae 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -120,6 +120,20 @@ define half @test_sqrt_sh2(half %a0, half %a1) {
   ret half %2
 }
 
+define half @test_sqrt_sh3(half %a0, half %a1) {
+; CHECK-LABEL: test_sqrt_sh3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; CHECK-NEXT:    vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
+; CHECK-NEXT:    vrsqrtsh %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %1 = call fast half @llvm.sqrt.f16(half %a0)
+  ret half %1
+}
+
 declare half @llvm.sqrt.f16(half)
 
 define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {

diff  --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
index 4a133e108f2c9..d3af80cd2ad26 100644
--- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
@@ -969,6 +969,18 @@ define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
   ret <8 x half> %2
 }
 
+define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
+; CHECK-LABEL: test_sqrt_ph_128_fast2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; CHECK-NEXT:    vcmpgeph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm1, %k1
+; CHECK-NEXT:    vrsqrtph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
+  ret <8 x half> %1
+}
+
 define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) {
 ; CHECK-LABEL: test_mask_sqrt_ph_128:
 ; CHECK:       # %bb.0:

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 1346145c190c9..ca0ab0212bf76 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -384,6 +384,37 @@ define float @f32_estimate(float %x) #1 {
   ret float %div
 }
 
+define float @f32_estimate2(float %x) #5 {
+; SSE-LABEL: f32_estimate2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    rsqrtss %xmm0, %xmm1
+; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    andnps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: f32_estimate2:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: f32_estimate2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX512-NEXT:    vandps %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %sqrt = tail call fast float @llvm.sqrt.f32(float %x)
+  ret float %sqrt
+}
+
 define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
 ; SSE-LABEL: v4f32_no_estimate:
 ; SSE:       # %bb.0:
@@ -446,6 +477,39 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
   ret <4 x float> %div
 }
 
+define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
+; SSE-LABEL: v4f32_estimate2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    rsqrtps %xmm0, %xmm2
+; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; SSE-NEXT:    cmpleps %xmm0, %xmm1
+; SSE-NEXT:    andps %xmm2, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: v4f32_estimate2:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; AVX1-NEXT:    vcmpleps %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vrsqrtps %xmm0, %xmm0
+; AVX1-NEXT:    vandps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: v4f32_estimate2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; AVX512-NEXT:    vcmpleps %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vrsqrtps %xmm0, %xmm0
+; AVX512-NEXT:    vandps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %sqrt = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
+  ret <4 x float> %sqrt
+}
+
 define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
 ; SSE-LABEL: v8f32_no_estimate:
 ; SSE:       # %bb.0:
@@ -1020,3 +1084,4 @@ attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt"
 attributes #2 = { nounwind readnone }
 attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" }
 attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" }


        


More information about the llvm-commits mailing list