[llvm] r353557 - [DAGCombine] Optimize pow(X, 0.75) to sqrt(X) * sqrt(sqrt(X))

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 8 11:50:58 PST 2019


Author: nemanjai
Date: Fri Feb  8 11:50:58 2019
New Revision: 353557

URL: http://llvm.org/viewvc/llvm-project?rev=353557&view=rev
Log:
[DAGCombine] Optimize pow(X, 0.75) to sqrt(X) * sqrt(sqrt(X))

The sqrt case is faster and we already do this for the case where
the exponent is 0.25. This adds the 0.75 case which is also not
sensitive to signed zeros.

Patch by Whitney Tsang (Whitney)

Differential revision: https://reviews.llvm.org/D57434

Added:
    llvm/trunk/test/CodeGen/AArch64/pow.75.ll
    llvm/trunk/test/CodeGen/ARM/pow.75.ll
    llvm/trunk/test/CodeGen/PowerPC/pow.75.ll
    llvm/trunk/test/CodeGen/X86/pow.75.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=353557&r1=353556&r2=353557&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb  8 11:50:58 2019
@@ -11912,18 +11912,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N
     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
   }
 
-  // Try to convert x ** (1/4) into square roots.
+  // Try to convert x ** (1/4) and x ** (3/4) into square roots.
   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
   // TODO: This could be extended (using a target hook) to handle smaller
   // power-of-2 fractional exponents.
-  if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+  bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+  bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+  if (ExponentIs025 || ExponentIs075) {
     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
+    // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+    // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
     // For regular numbers, rounding may cause the results to differ.
     // Therefore, we require { nsz ninf afn } for this transform.
     // TODO: We could select out the special cases if we don't have nsz/ninf.
     SDNodeFlags Flags = N->getFlags();
-    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+
+    // We only need no signed zeros for the 0.25 case.
+    if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
         !Flags.hasApproximateFuncs())
       return SDValue();
 
@@ -11939,7 +11945,11 @@ SDValue DAGCombiner::visitFPOW(SDNode *N
     // pow(X, 0.25) --> sqrt(sqrt(X))
     SDLoc DL(N);
     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
-    return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+    SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+    if (ExponentIs025)
+      return SqrtSqrt;
+    // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+    return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
   }
 
   return SDValue();

Added: llvm/trunk/test/CodeGen/AArch64/pow.75.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/pow.75.ll?rev=353557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/pow.75.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/pow.75.ll Fri Feb  8 11:50:58 2019
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- -debug 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare float @llvm.pow.f32(float, float)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare double @llvm.pow.f64(double, double)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+
+define float @pow_f32_three_fourth_fmf(float %x) nounwind {
+; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01)
+  ret float %r
+}
+
+define double @pow_f64_three_fourth_fmf(double %x) nounwind {
+; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01)
+  ret double %r
+}
+
+define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind {
+; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 7.5e-1, float 7.5e-1, float 7.5e-01, float 7.5e-01>)
+  ret <4 x float> %r
+}
+
+define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind {
+; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.5e-1, double 7.5e-1>)
+  ret <2 x double> %r
+}

Added: llvm/trunk/test/CodeGen/ARM/pow.75.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/pow.75.ll?rev=353557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/pow.75.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/pow.75.ll Fri Feb  8 11:50:58 2019
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon -debug 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare float @llvm.pow.f32(float, float)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare double @llvm.pow.f64(double, double)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+
+define float @pow_f32_three_fourth_fmf(float %x) nounwind {
+; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01)
+  ret float %r
+}
+
+define double @pow_f64_three_fourth_fmf(double %x) nounwind {
+; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn t2, ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01)
+  ret double %r
+}
+
+define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind {
+; CHECK: Combining: {{.*}}: v4f32 = BUILD_VECTOR [[FORTH:t[0-9]+]], [[THIRD:t[0-9]+]], [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]]
+; CHECK: Combining: [[FIRST]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK: Combining: [[SECOND]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK: Combining: [[THIRD]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK: Combining: [[FORTH]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 7.5e-1, float 7.5e-1, float 7.5e-01, float 7.5e-01>)
+  ret <4 x float> %r
+}
+
+define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind {
+; CHECK: Combining: {{.*}}: v2f64 = BUILD_VECTOR [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]]
+; CHECK: Combining: [[FIRST]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t16, t17
+; CHECK: Combining: [[SECOND]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t19, t20
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.5e-1, double 7.5e-1>)
+  ret <2 x double> %r
+}

Added: llvm/trunk/test/CodeGen/PowerPC/pow.75.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/pow.75.ll?rev=353557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/pow.75.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/pow.75.ll Fri Feb  8 11:50:58 2019
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -debug 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare float @llvm.pow.f32(float, float)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare double @llvm.pow.f64(double, double)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+
+define float @pow_f32_three_fourth_fmf(float %x) nounwind {
+; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01)
+  ret float %r
+}
+
+define double @pow_f64_three_fourth_fmf(double %x) nounwind {
+; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01)
+  ret double %r
+}
+
+define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind {
+; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 7.5e-1, float 7.5e-1, float 7.5e-01, float 7.5e-01>)
+  ret <4 x float> %r
+}
+
+define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind {
+; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.5e-1, double 7.5e-1>)
+  ret <2 x double> %r
+}

Added: llvm/trunk/test/CodeGen/X86/pow.75.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pow.75.ll?rev=353557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pow.75.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pow.75.ll Fri Feb  8 11:50:58 2019
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -debug 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare float @llvm.pow.f32(float, float)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare double @llvm.pow.f64(double, double)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+
+define float @pow_f32_three_fourth_fmf(float %x) nounwind {
+; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01)
+  ret float %r
+}
+
+define double @pow_f64_three_fourth_fmf(double %x) nounwind {
+; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01)
+  ret double %r
+}
+
+define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind {
+; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 7.5e-1, float 7.5e-1, float 7.5e-01, float 7.5e-01>)
+  ret <4 x float> %r
+}
+
+define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind {
+; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}}
+; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
+; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
+; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+; CHECK-NEXT:  ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.5e-1, double 7.5e-1>)
+  ret <2 x double> %r
+}




More information about the llvm-commits mailing list