[llvm] b86a1cd - [PowerPC] dyn_cast should be dyn_cast_or_null in MASSV pass

Masoud Ataei via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 24 08:22:03 PST 2020


Author: Masoud Ataei
Date: 2020-11-24T16:21:12Z
New Revision: b86a1cd2f8540b311b5b921235e612fea4134dff

URL: https://github.com/llvm/llvm-project/commit/b86a1cd2f8540b311b5b921235e612fea4134dff
DIFF: https://github.com/llvm/llvm-project/commit/b86a1cd2f8540b311b5b921235e612fea4134dff.diff

LOG: [PowerPC] dyn_cast should be dyn_cast_or_null in MASSV pass

It is possible that we have different constants in different slots
of second vector double (float) of pow function. So, in this case
Exp->getSplatValue() will return nullptr. Here, I handle it properly.

Reviewed By: steven.zhang, PowerPC

Differential Revision: https://reviews.llvm.org/D91729

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
    llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll
    llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
index a61e1f83705e..27b2c9a628d0 100644
--- a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -105,7 +105,7 @@ bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
     return false;
 
   if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(Exp->getSplatValue())) {
+    if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Exp->getSplatValue())) {
       // If the argument is 0.75 or 0.25 it is cheaper to turn it into pow
       // intrinsic so that it could be optimzed as sequence of sqrt's.
       if (!CI->hasNoInfs() || !CI->hasApproxFunc())

diff  --git a/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll b/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll
index 43b0477b9a74..f4c3a149bfcd 100644
--- a/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll
+++ b/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
 
 ; Exponent is a variable
-define void @my_vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_var
+define void @vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_var
 ; CHECK-PWR9:        bl __powd2_P9
 ; CHECK-PWR8:        bl __powd2_P8
 ; CHECK:             blr
@@ -31,8 +31,8 @@ for.end:
 }
 
 ; Exponent is a constant != 0.75 and !=0.25
-define void @my_vpow_const(double* nocapture %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_const
+define void @vpow_const(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_const
 ; CHECK-PWR9:        bl __powd2_P9
 ; CHECK-PWR8:        bl __powd2_P8
 ; CHECK:             blr
@@ -56,9 +56,87 @@ for.end:
   ret void
 }
 
+; Exponent is a constant != 0.75 and !=0.25 and they are 
diff erent 
+define void @vpow_noeq_const(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_noeq_const
+; CHECK-PWR9:        bl __powd2_P9
+; CHECK-PWR8:        bl __powd2_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr double, double* %y, i64 %index
+  %next.gep19 = getelementptr double, double* %x, i64 %index
+  %0 = bitcast double* %next.gep19 to <2 x double>*
+  %wide.load = load <2 x double>, <2 x double>* %0, align 8
+  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.600000e-01>)
+  %2 = bitcast double* %next.gep to <2 x double>*
+  store <2 x double> %1, <2 x double>* %2, align 8
+  %index.next = add i64 %index, 2
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
+; Exponent is a constant != 0.75 and !=0.25 and they are 
diff erent 
+define void @vpow_noeq075_const(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_noeq075_const
+; CHECK-PWR9:        bl __powd2_P9
+; CHECK-PWR8:        bl __powd2_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr double, double* %y, i64 %index
+  %next.gep19 = getelementptr double, double* %x, i64 %index
+  %0 = bitcast double* %next.gep19 to <2 x double>*
+  %wide.load = load <2 x double>, <2 x double>* %0, align 8
+  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.500000e-01>)
+  %2 = bitcast double* %next.gep to <2 x double>*
+  store <2 x double> %1, <2 x double>* %2, align 8
+  %index.next = add i64 %index, 2
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
+; Exponent is a constant != 0.75 and !=0.25 and they are 
diff erent 
+define void @vpow_noeq025_const(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_noeq025_const
+; CHECK-PWR9:        bl __powd2_P9
+; CHECK-PWR8:        bl __powd2_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr double, double* %y, i64 %index
+  %next.gep19 = getelementptr double, double* %x, i64 %index
+  %0 = bitcast double* %next.gep19 to <2 x double>*
+  %wide.load = load <2 x double>, <2 x double>* %0, align 8
+  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 2.500000e-01>)
+  %2 = bitcast double* %next.gep to <2 x double>*
+  store <2 x double> %1, <2 x double>* %2, align 8
+  %index.next = add i64 %index, 2
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
 ; Exponent is 0.75
-define void @my_vpow_075(double* nocapture %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_075
+define void @vpow_075(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_075
 ; CHECK-NOT:         bl __powd2_P{{[8,9]}}
 ; CHECK:             xvrsqrtesp
 ; CHECK:             blr
@@ -83,8 +161,8 @@ for.end:
 }
 
 ; Exponent is 0.25
-define void @my_vpow_025(double* nocapture %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_025
+define void @vpow_025(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_025
 ; CHECK-NOT:         bl __powd2_P{{[8,9]}}
 ; CHECK:             xvrsqrtesp
 ; CHECK:             blr
@@ -109,8 +187,8 @@ for.end:
 }
 
 ; Exponent is 0.75 but no proper fast-math flags
-define void @my_vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_075_nofast
+define void @vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_075_nofast
 ; CHECK-PWR9:        bl __powd2_P9
 ; CHECK-PWR8:        bl __powd2_P8
 ; CHECK-NOT:         xvrsqrtesp
@@ -136,8 +214,8 @@ for.end:
 }
 
 ; Exponent is 0.25 but no proper fast-math flags
-define void @my_vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) {
-; CHECK-LABEL:       @vspow_025_nofast
+define void @vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) {
+; CHECK-LABEL:       @vpow_025_nofast
 ; CHECK-PWR9:        bl __powd2_P9
 ; CHECK-PWR8:        bl __powd2_P8
 ; CHECK-NOT:         xvrsqrtesp

diff  --git a/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll b/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll
index f251225975d3..caa247acc2b6 100644
--- a/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll
+++ b/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll
@@ -56,6 +56,84 @@ for.end:
   ret void
 }
 
+; Exponent is a constant != 0.75 and !=0.25 and they are 
diff erent 
+define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x)  {
+; CHECK-LABEL:       @vspow_neq_const
+; CHECK-PWR9:        bl __powf4_P9
+; CHECK-PWR8:        bl __powf4_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr float, float* %y, i64 %index
+  %next.gep19 = getelementptr float, float* %x, i64 %index
+  %0 = bitcast float* %next.gep19 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %0, align 4
+  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
+  %2 = bitcast float* %next.gep to <4 x float>*
+  store <4 x float> %1, <4 x float>* %2, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
+; Exponent is a constant != 0.75 and !=0.25
+define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x)  {
+; CHECK-LABEL:       @vspow_neq075_const
+; CHECK-PWR9:        bl __powf4_P9
+; CHECK-PWR8:        bl __powf4_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr float, float* %y, i64 %index
+  %next.gep19 = getelementptr float, float* %x, i64 %index
+  %0 = bitcast float* %next.gep19 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %0, align 4
+  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
+  %2 = bitcast float* %next.gep to <4 x float>*
+  store <4 x float> %1, <4 x float>* %2, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
+; Exponent is a constant != 0.75 and !=0.25
+define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x)  {
+; CHECK-LABEL:       @vspow_neq025_const
+; CHECK-PWR9:        bl __powf4_P9
+; CHECK-PWR8:        bl __powf4_P8
+; CHECK:             blr
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %next.gep = getelementptr float, float* %y, i64 %index
+  %next.gep19 = getelementptr float, float* %x, i64 %index
+  %0 = bitcast float* %next.gep19 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %0, align 4
+  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
+  %2 = bitcast float* %next.gep to <4 x float>*
+  store <4 x float> %1, <4 x float>* %2, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
 ; Exponent is 0.75
 define void @vspow_075(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_075


        


More information about the llvm-commits mailing list