[llvm] r264532 - [PowerPC] Map max/minnum intrinsics and fmax/fmin to ISD nodes for CTR-based loop legality

Sat Mar 26 22:40:56 PDT 2016

Author: hfinkel
Date: Sun Mar 27 00:40:56 2016
New Revision: 264532

URL: http://llvm.org/viewvc/llvm-project?rev=264532&view=rev
Log:
[PowerPC] Map max/minnum intrinsics and fmax/fmin to ISD nodes for CTR-based loop legality

Intrinsic::maxnum and Intrinsic::minnum, along with the associated libc
function calls (fmax[f], etc.) generally map to function calls after lowering.
For some vector types with QPX at least, however, we can legally lower these,
and we don't need to prohibit CTR-based loops on their account.

It turned out, however, that the logic that checked the opcodes associated with
intrinsics was broken (it would set the Opcode variable, but that variable was
later checked only if set for some otherwise-external function call.

This fixes the latter problem and adds the FMAX/MINNUM mappings.

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp
    llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp?rev=264532&r1=264531&r2=264532&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCCTRLoops.cpp Sun Mar 27 00:40:56 2016
@@ -245,7 +245,7 @@ bool PPCCTRLoops::mightUseCTR(const Trip
       if (Function *F = CI->getCalledFunction()) {
         // Most intrinsics don't become function calls, but some might.
         // sin, cos, exp and log are always calls.
-        unsigned Opcode;
+        unsigned Opcode = 0;
         if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
           switch (F->getIntrinsicID()) {
           default: continue;
@@ -291,8 +291,6 @@ bool PPCCTRLoops::mightUseCTR(const Trip
           case Intrinsic::pow:
           case Intrinsic::sin:
           case Intrinsic::cos:
-          case Intrinsic::maxnum:
-          case Intrinsic::minnum:
             return true;
           case Intrinsic::copysign:
             if (CI->getArgOperand(0)->getType()->getScalarType()->
@@ -307,6 +305,8 @@ bool PPCCTRLoops::mightUseCTR(const Trip
           case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
           case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
           case Intrinsic::round:     Opcode = ISD::FROUND;     break;
+          case Intrinsic::minnum:    Opcode = ISD::FMINNUM;    break;
+          case Intrinsic::maxnum:    Opcode = ISD::FMAXNUM;    break;
           }
         }
 
@@ -366,8 +366,18 @@ bool PPCCTRLoops::mightUseCTR(const Trip
           case LibFunc::truncf:
           case LibFunc::truncl:
             Opcode = ISD::FTRUNC; break;
+          case LibFunc::fmin:
+          case LibFunc::fminf:
+          case LibFunc::fminl:
+            Opcode = ISD::FMINNUM; break;
+          case LibFunc::fmax:
+          case LibFunc::fmaxf:
+          case LibFunc::fmaxl:
+            Opcode = ISD::FMAXNUM; break;
           }
+        }
 
+        if (Opcode) {
           auto &DL = CI->getModule()->getDataLayout();
           MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
                                             true);

Modified: llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll?rev=264532&r1=264531&r2=264532&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll Sun Mar 27 00:40:56 2016
@@ -1,5 +1,23 @@
-; RUN: llc < %s | FileCheck %s
-target triple = "powerpc64le-unknown-linux-gnu"
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=a2q < %s | FileCheck %s --check-prefix=QPX
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare float @fabsf(float)
+
+declare float @fminf(float, float)
+declare double @fmin(double, double)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+
+declare float @fmaxf(float, float)
+declare double @fmax(double, double)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
 
 define void @test1(float %f, float* %fp) {
 entry:
@@ -18,8 +36,53 @@ loop_exit:
 }
 
 ; CHECK-LABEL: test1:
+; CHECK-NOT: mtctr
+; CHECK: bl fminf
+
+define void @test1v(<4 x float> %f, <4 x float>* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call <4 x float> @llvm.minnum.v4f32(<4 x float> %f, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
+  store <4 x float> %0, <4 x float>* %fp, align 16
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test1v:
+; CHECK-NOT: mtctr
 ; CHECK: bl fminf
 
+; QPX-LABEL: test1v:
+; QPX: mtctr
+; QPX-NOT: bl fminf
+; QPX: blr
+
+define void @test1a(float %f, float* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call float @fminf(float %f, float 1.0) readnone
+  store float %0, float* %fp, align 4
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test1a:
+; CHECK-NOT: mtctr
+; CHECK: bl fminf
 
 define void @test2(float %f, float* %fp) {
 entry:
@@ -38,7 +101,131 @@ loop_exit:
 }
 
 ; CHECK-LABEL: test2:
+; CHECK-NOT: mtctr
 ; CHECK: bl fmaxf
 
-declare float @llvm.minnum.f32(float, float)
-declare float @llvm.maxnum.f32(float, float)
+define void @test2v(<4 x double> %f, <4 x double>* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %f, <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>)
+  store <4 x double> %0, <4 x double>* %fp, align 16
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test2v:
+; CHECK-NOT: mtctr
+; CHECK: bl fmax
+
+; QPX-LABEL: test2v:
+; QPX: mtctr
+; QPX-NOT: bl fmax
+; QPX: blr
+
+define void @test2a(float %f, float* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call float @fmaxf(float %f, float 1.0) readnone
+  store float %0, float* %fp, align 4
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test2a:
+; CHECK-NOT: mtctr
+; CHECK: bl fmaxf
+
+define void @test3(double %f, double* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call double @llvm.minnum.f64(double %f, double 1.0)
+  store double %0, double* %fp, align 8
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test3:
+; CHECK-NOT: mtctr
+; CHECK: bl fmin
+
+define void @test3a(double %f, double* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call double @fmin(double %f, double 1.0) readnone
+  store double %0, double* %fp, align 8
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test3a:
+; CHECK-NOT: mtctr
+; CHECK: bl fmin
+
+define void @test4(double %f, double* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call double @llvm.maxnum.f64(double %f, double 1.0)
+  store double %0, double* %fp, align 8
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test4:
+; CHECK-NOT: mtctr
+; CHECK: bl fmax
+
+define void @test4a(double %f, double* %fp) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %1, %loop_body ]
+  %0 = call double @fmax(double %f, double 1.0) readnone
+  store double %0, double* %fp, align 8
+  %1 = add i64 %invar_address.dim.0.01, 1
+  %2 = icmp eq i64 %1, 2
+  br i1 %2, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
+; CHECK-LABEL: test4a:
+; CHECK-NOT: mtctr
+; CHECK: bl fmax
+