[llvm] r364714 - [InstCombine] canonicalize fmin/fmax to LLVM intrinsics minnum/maxnum

Sat Jun 29 07:28:54 PDT 2019

Author: spatel
Date: Sat Jun 29 07:28:54 2019
New Revision: 364714

URL: http://llvm.org/viewvc/llvm-project?rev=364714&view=rev
Log:
[InstCombine] canonicalize fmin/fmax to LLVM intrinsics minnum/maxnum

This transform came up in D62414, but we should deal with it first.
We have LLVM intrinsics that correspond exactly to libm calls (unlike
most libm calls, these libm calls never set errno).
This holds without any fast-math-flags, so we should always canonicalize
to those intrinsics directly for better optimization.
Currently, we convert to fcmp+select only when we have FMF (nnan) because
fcmp+select does not preserve the semantics of the call in the general case.

Differential Revision: https://reviews.llvm.org/D63214

Modified:
    llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll
    llvm/trunk/test/Transforms/InstCombine/fast-math.ll
    llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=364714&r1=364713&r2=364714&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp Sat Jun 29 07:28:54 2019
@@ -1563,40 +1563,30 @@ Value *LibCallSimplifier::optimizeExp2(C
 }
 
 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
   // If we can shrink the call to a float function rather than a double
   // function, do that first.
+  Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
     if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
       return Ret;
 
+  // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+  // the intrinsics for improved optimization (for example, vectorization).
+  // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+  // From the C standard draft WG14/N1256:
+  // "Ideally, fmax would be sensitive to the sign of zero, for example
+  // fmax(-0.0, +0.0) would return +0; however, implementation in software
+  // might be impractical."
   IRBuilder<>::FastMathFlagGuard Guard(B);
-  FastMathFlags FMF;
-  if (CI->isFast()) {
-    // If the call is 'fast', then anything we create here will also be 'fast'.
-    FMF.setFast();
-  } else {
-    // At a minimum, no-nans-fp-math must be true.
-    if (!CI->hasNoNaNs())
-      return nullptr;
-    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
-    // "Ideally, fmax would be sensitive to the sign of zero, for example
-    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
-    // might be impractical."
-    FMF.setNoSignedZeros();
-    FMF.setNoNaNs();
-  }
+  FastMathFlags FMF = CI->getFastMathFlags();
+  FMF.setNoSignedZeros();
   B.setFastMathFlags(FMF);
 
-  // We have a relaxed floating-point environment. We can ignore NaN-handling
-  // and transform to a compare and select. We do not have to consider errno or
-  // exceptions, because fmin/fmax do not have those.
-  Value *Op0 = CI->getArgOperand(0);
-  Value *Op1 = CI->getArgOperand(1);
-  Value *Cmp = Callee->getName().startswith("fmin") ?
-    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
-  return B.CreateSelect(Cmp, Op0, Op1);
+  Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+                                                           : Intrinsic::maxnum;
+  Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
 }
 
 Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {

Modified: llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll?rev=364714&r1=364713&r2=364714&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll Sat Jun 29 07:28:54 2019
@@ -513,7 +513,7 @@ define double @tanh_test2(float %f) {
 ; flags are propagated for shrunken *binary* double FP calls.
 define float @max1(float %a, float %b) {
 ; CHECK-LABEL: @max1(
-; ISC99-NEXT:    [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; ISC99-NEXT:    [[FMAXF:%.*]] = call nsz arcp float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
 ; ISC99-NEXT:    ret float [[FMAXF]]
 ; ISC89:         [[FMAXF:%.*]] = call arcp double @fmax(double [[A:%.*]], double [[B:%.*]])
 ;
@@ -524,14 +524,15 @@ define float @max1(float %a, float %b) {
   ret float %f
 }
 
-; A function can have a name that matches a common libcall,
-; but with the wrong type(s). Let it be.
+; This is treated as libm 'fmin' - LLVM types do not necessarily
+; correspond to 'C' types, so this is not required to be "fminl".
 
 define float @fake_fmin(float %a, float %b) {
 ; CHECK-LABEL: @fake_fmin(
 ; CHECK-NEXT:    [[C:%.*]] = fpext float [[A:%.*]] to fp128
 ; CHECK-NEXT:    [[D:%.*]] = fpext float [[B:%.*]] to fp128
-; CHECK-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; ISC99-NEXT:    [[E:%.*]] = call nsz fp128 @llvm.minnum.f128(fp128 [[C]], fp128 [[D]])
+; ISC89-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
 ; CHECK-NEXT:    [[F:%.*]] = fptrunc fp128 [[E]] to float
 ; CHECK-NEXT:    ret float [[F]]
 ;
@@ -542,7 +543,7 @@ define float @fake_fmin(float %a, float
   ret float %f
 }
 
-declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
+declare fp128 @fmin(fp128, fp128)
 
 declare double @fmax(double, double)
 

Modified: llvm/trunk/test/Transforms/InstCombine/fast-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fast-math.ll?rev=364714&r1=364713&r2=364714&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fast-math.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fast-math.ll Sat Jun 29 07:28:54 2019
@@ -811,17 +811,13 @@ declare float @fminf(float, float)
 declare fp128 @fmaxl(fp128, fp128)
 declare fp128 @fminl(fp128, fp128)
 
-; No NaNs is the minimum requirement to replace these calls.
-; This should always be set when unsafe-fp-math is true, but
-; alternate the attributes for additional test coverage.
 ; 'nsz' is implied by the definition of fmax or fmin itself.
 
-; Shrink and remove the call.
+; Shrink and replace the call.
 define float @max1(float %a, float %b) {
 ; CHECK-LABEL: @max1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = fpext float %a to double
   %d = fpext float %b to double
@@ -832,8 +828,8 @@ define float @max1(float %a, float %b) {
 
 define float @fmax_no_fmf(float %a, float %b) {
 ; CHECK-LABEL: @fmax_no_fmf(
-; CHECK-NEXT:    [[C:%.*]] = call float @fmaxf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = call float @fmaxf(float %a, float %b)
   ret float %c
@@ -841,9 +837,8 @@ define float @fmax_no_fmf(float %a, floa
 
 define float @max2(float %a, float %b) {
 ; CHECK-LABEL: @max2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = call nnan float @fmaxf(float %a, float %b)
   ret float %c
@@ -852,9 +847,8 @@ define float @max2(float %a, float %b) {
 
 define double @max3(double %a, double %b) {
 ; CHECK-LABEL: @max3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.maxnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %c = call fast double @fmax(double %a, double %b)
   ret double %c
@@ -862,9 +856,8 @@ define double @max3(double %a, double %b
 
 define fp128 @max4(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: @max4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT:    ret fp128 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz fp128 @llvm.maxnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT:    ret fp128 [[TMP1]]
 ;
   %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
   ret fp128 %c
@@ -873,9 +866,8 @@ define fp128 @max4(fp128 %a, fp128 %b) {
 ; Shrink and remove the call.
 define float @min1(float %a, float %b) {
 ; CHECK-LABEL: @min1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = fpext float %a to double
   %d = fpext float %b to double
@@ -886,8 +878,8 @@ define float @min1(float %a, float %b) {
 
 define float @fmin_no_fmf(float %a, float %b) {
 ; CHECK-LABEL: @fmin_no_fmf(
-; CHECK-NEXT:    [[C:%.*]] = call float @fminf(float [[A:%.*]], float [[B:%.*]])
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = call float @fminf(float %a, float %b)
   ret float %c
@@ -895,9 +887,8 @@ define float @fmin_no_fmf(float %a, floa
 
 define float @min2(float %a, float %b) {
 ; CHECK-LABEL: @min2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = call fast float @fminf(float %a, float %b)
   ret float %c
@@ -905,9 +896,8 @@ define float @min2(float %a, float %b) {
 
 define double @min3(double %a, double %b) {
 ; CHECK-LABEL: @min3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], double [[A]], double [[B]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %c = call nnan double @fmin(double %a, double %b)
   ret double %c
@@ -915,9 +905,8 @@ define double @min3(double %a, double %b
 
 define fp128 @min4(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: @min4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select fast i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
-; CHECK-NEXT:    ret fp128 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast fp128 @llvm.minnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
+; CHECK-NEXT:    ret fp128 [[TMP1]]
 ;
   %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
   ret fp128 %c

Modified: llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll?rev=364714&r1=364713&r2=364714&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll Sat Jun 29 07:28:54 2019
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -356,9 +357,9 @@ define i1 @test14_intrin(float %x, float
 
 define i1 @test15(float %x, float %y, float %z) {
 ; CHECK-LABEL: @test15(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %1 = fpext float %x to double
   %2 = fpext float %y to double
@@ -370,9 +371,9 @@ define i1 @test15(float %x, float %y, fl
 
 define i1 @test16(float %x, float %y, float %z) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %1 = fpext float %z to double
   %2 = fpext float %x to double
@@ -384,9 +385,9 @@ define i1 @test16(float %x, float %y, fl
 
 define i1 @test17(float %x, float %y, float %z) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %1 = fpext float %x to double
   %2 = fpext float %y to double
@@ -398,9 +399,9 @@ define i1 @test17(float %x, float %y, fl
 
 define i1 @test18(float %x, float %y, float %z) {
 ; CHECK-LABEL: @test18(
-; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %1 = fpext float %z to double
   %2 = fpext float %x to double
@@ -426,9 +427,9 @@ define i1 @test19(float %x, float %y, fl
 
 define i1 @test20(float %x, float %y) {
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float 1.000000e+00, float %x) #0
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %y
-; CHECK-NEXT:    ret i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %1 = fpext float %y to double
   %2 = fpext float %x to double
@@ -441,9 +442,9 @@ define i1 @test20(float %x, float %y) {
 
 define i1 @test21(float %x, float %y) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[TMP1:%.*]] = fpext float %y to double
-; CHECK-NEXT:    [[TMP2:%.*]] = fpext float %x to double
-; CHECK-NEXT:    [[TMP3:%.*]] = call double @fmin(double 1.300000e+00, double [[TMP2]]) #2
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[Y:%.*]] to double
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[X:%.*]] to double
+; CHECK-NEXT:    [[TMP3:%.*]] = call nsz double @llvm.minnum.f64(double [[TMP2]], double 1.300000e+00)
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq double [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;