[llvm] ec146cb - [LV] Add support for minimum/maximum intrinsics

Tue Jun 20 10:17:32 PDT 2023

Author: Anna Thomas
Date: 2023-06-20T13:17:28-04:00
New Revision: ec146cb7c0b4a162ee73463e6c7bb306b99e013b

URL: https://github.com/llvm/llvm-project/commit/ec146cb7c0b4a162ee73463e6c7bb306b99e013b
DIFF: https://github.com/llvm/llvm-project/commit/ec146cb7c0b4a162ee73463e6c7bb306b99e013b.diff

LOG: [LV] Add support for minimum/maximum intrinsics

{mini|maxi}mum intrinsics are different from {min|max}num intrinsics in
the propagation of NaN and signed zero. Also, the minnum/maxnum
intrinsics require the presence of nsz flags to be valid reductions in
vectorizer. In this regard, we introduce a new recurrence kind and also
add support for identifying reduction patterns using these intrinsics.

The reduction intrinsics and lowering was introduced here: 26bfbec5d2.

There are tests added which show how this interacts across chains of
min/max patterns.

Differential Revision: https://reviews.llvm.org/D151482

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/IVDescriptors.h
    llvm/lib/Analysis/IVDescriptors.cpp
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/test/Transforms/LoopVectorize/minmax_reduction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index d8da0b86ec511..42826cd4e660a 100644

--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
   FMul,       ///< Product of floats.
   FMin,       ///< FP min implemented in terms of select(cmp()).
   FMax,       ///< FP max implemented in terms of select(cmp()).
+  FMinimum,   ///< FP min with llvm.minimum semantics
+  FMaximum,   ///< FP max with llvm.maximum semantics
   FMulAdd,    ///< Fused multiply-add of floats (a * b + c).
   SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
               ///< invariant
@@ -223,7 +225,8 @@ class RecurrenceDescriptor {
 
   /// Returns true if the recurrence kind is a floating-point min/max kind.
   static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
-    return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
+    return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+           Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
   }
 
   /// Returns true if the recurrence kind is any min/max kind.

diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 3216fad8a8ad1..ce3a70b0492c7 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -706,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
     return InstDesc(Kind == RecurKind::FMin, I);
   if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
     return InstDesc(Kind == RecurKind::FMax, I);
+  if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
+    return InstDesc(Kind == RecurKind::FMinimum, I);
+  if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
+    return InstDesc(Kind == RecurKind::FMaximum, I);
 
   return InstDesc(false, I);
 }
@@ -801,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
   case Instruction::Call:
     if (isSelectCmpRecurrenceKind(Kind))
       return isSelectCmpPattern(L, OrigPhi, I, Prev);
+    auto HasRequiredFMF = [&]() {
+     if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
+       return true;
+     if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
+       return true;
+     // minimum and maximum intrinsics do not require nsz and nnan flags since
+     // NaN and signed zeroes are propagated in the intrinsic implementation.
+     return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
+            match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
+    };
     if (isIntMinMaxRecurrenceKind(Kind) ||
-        (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
-          (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
-           I->hasNoSignedZeros())) &&
-         isFPMinMaxRecurrenceKind(Kind)))
+        (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
       return isMinMaxPattern(I, Kind, Prev);
     else if (isFMulAddIntrinsic(I))
       return InstDesc(Kind == RecurKind::FMulAdd, I,
@@ -923,6 +934,16 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
     LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
     return true;
   }
+  if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
+    LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
+    LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
+    return true;
+  }
   // Not a reduction of known type.
   return false;
 }
@@ -1063,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
     assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
            "nnan, nsz is expected to be set for FP max reduction.");
     return ConstantFP::getInfinity(Tp, true /*Negative*/);
+  case RecurKind::FMinimum:
+    return ConstantFP::getInfinity(Tp, false /*Negative*/);
+  case RecurKind::FMaximum:
+    return ConstantFP::getInfinity(Tp, true /*Negative*/);
   case RecurKind::SelectICmp:
   case RecurKind::SelectFCmp:
     return getRecurrenceStartValue();
@@ -1097,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
     return Instruction::ICmp;
   case RecurKind::FMax:
   case RecurKind::FMin:
+  case RecurKind::FMaximum:
+  case RecurKind::FMinimum:
   case RecurKind::SelectFCmp:
     return Instruction::FCmp;
   default:

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 90c9396e11bd6..2dfaea7e6680e 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -909,6 +909,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
     return Intrinsic::minnum;
   case RecurKind::FMax:
     return Intrinsic::maxnum;
+  case RecurKind::FMinimum:
+    return Intrinsic::minimum;
+  case RecurKind::FMaximum:
+    return Intrinsic::maximum;
   }
 }
 
@@ -928,6 +932,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
     return CmpInst::FCMP_OLT;
   case RecurKind::FMax:
     return CmpInst::FCMP_OGT;
+  // We do not add FMinimum/FMaximum recurrence kind here since there is no
+  // equivalent predicate which compares signed zeroes according to the
+  // semantics of the intrinsics (llvm.minimum/maximum).
   }
 }
 
@@ -943,7 +950,8 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
 Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
                             Value *Right) {
   Type *Ty = Left->getType();
-  if (Ty->isIntOrIntVectorTy()) {
+  if (Ty->isIntOrIntVectorTy() ||
+      (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
     // TODO: Add float minnum/maxnum support when FMF nnan is set.
     Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
     return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
@@ -1094,6 +1102,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
     return Builder.CreateFPMaxReduce(Src);
   case RecurKind::FMin:
     return Builder.CreateFPMinReduce(Src);
+  case RecurKind::FMinimum:
+    return Builder.CreateFPMinimumReduce(Src);
+  case RecurKind::FMaximum:
+    return Builder.CreateFPMaximumReduce(Src);
   default:
     llvm_unreachable("Unhandled opcode");
   }

diff  --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index 65c04ca1a89ee..85a90f2e04c5e 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
+; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2  < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -1090,6 +1090,120 @@ for.body:                                         ; preds = %entry, %for.body
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+; CHECK-LABEL: fmaximum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32
+define float @fmaximum_intrinsic(ptr nocapture readonly %x) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %1
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call float @llvm.maximum.f32(float %s.011, float %0)
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_intrinsic(ptr nocapture readonly %x) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %1
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call float @llvm.minimum.f32(float %s.011, float %0)
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %cond9
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+  %0 = load float, ptr %arrayidx, align 4
+  %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0)
+  %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+  %1 = load float, ptr %arrayidx3, align 4
+  %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+  %inc = add nuw nsw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum_one_with_flags
+; CHECK-LABEL: vector.body:
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %cond9
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+  %0 = load float, ptr %arrayidx, align 4
+  %s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0)
+  %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+  %1 = load float, ptr %arrayidx3, align 4
+  %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+  %inc = add nuw nsw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 ; Make sure any check-not directives are not triggered by function declarations.
 ; CHECK: declare
 
@@ -1099,6 +1213,8 @@ declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
 declare float @llvm.minnum.f32(float, float)
 declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.minimum.f32(float, float)
+declare float @llvm.maximum.f32(float, float)
 
 attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
 attributes #1 = { "no-nans-fp-math"="true" }