[llvm] ec146cb - [LV] Add support for minimum/maximum intrinsics
Anna Thomas via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 20 10:17:32 PDT 2023
Author: Anna Thomas
Date: 2023-06-20T13:17:28-04:00
New Revision: ec146cb7c0b4a162ee73463e6c7bb306b99e013b
URL: https://github.com/llvm/llvm-project/commit/ec146cb7c0b4a162ee73463e6c7bb306b99e013b
DIFF: https://github.com/llvm/llvm-project/commit/ec146cb7c0b4a162ee73463e6c7bb306b99e013b.diff
LOG: [LV] Add support for minimum/maximum intrinsics
{mini|maxi}mum intrinsics are different from {min|max}num intrinsics in
the propagation of NaN and signed zero. Also, the minnum/maxnum
intrinsics require the presence of nsz flags to be valid reductions in
vectorizer. In this regard, we introduce a new recurrence kind and also
add support for identifying reduction patterns using these intrinsics.
The reduction intrinsics and lowering was introduced here: 26bfbec5d2.
There are tests added which show how this interacts across chains of
min/max patterns.
Differential Revision: https://reviews.llvm.org/D151482
Added:
Modified:
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index d8da0b86ec511..42826cd4e660a 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMinimum, ///< FP min with llvm.minimum semantics
+ FMaximum, ///< FP max with llvm.maximum semantics
FMulAdd, ///< Fused multiply-add of floats (a * b + c).
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
///< invariant
@@ -223,7 +225,8 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
- return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
+ return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+ Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
}
/// Returns true if the recurrence kind is any min/max kind.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 3216fad8a8ad1..ce3a70b0492c7 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -706,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
+ if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMinimum, I);
+ if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMaximum, I);
return InstDesc(false, I);
}
@@ -801,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
case Instruction::Call:
if (isSelectCmpRecurrenceKind(Kind))
return isSelectCmpPattern(L, OrigPhi, I, Prev);
+ auto HasRequiredFMF = [&]() {
+ if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
+ return true;
+ if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
+ return true;
+ // minimum and maximum intrinsics do not require nsz and nnan flags since
+ // NaN and signed zeroes are propagated in the intrinsic implementation.
+ return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
+ match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
+ };
if (isIntMinMaxRecurrenceKind(Kind) ||
- (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
- (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
- I->hasNoSignedZeros())) &&
- isFPMinMaxRecurrenceKind(Kind)))
+ (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
@@ -923,6 +934,16 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -1063,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
"nnan, nsz is expected to be set for FP max reduction.");
return ConstantFP::getInfinity(Tp, true /*Negative*/);
+ case RecurKind::FMinimum:
+ return ConstantFP::getInfinity(Tp, false /*Negative*/);
+ case RecurKind::FMaximum:
+ return ConstantFP::getInfinity(Tp, true /*Negative*/);
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return getRecurrenceStartValue();
@@ -1097,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimum:
case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 90c9396e11bd6..2dfaea7e6680e 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -909,6 +909,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
return Intrinsic::minnum;
case RecurKind::FMax:
return Intrinsic::maxnum;
+ case RecurKind::FMinimum:
+ return Intrinsic::minimum;
+ case RecurKind::FMaximum:
+ return Intrinsic::maximum;
}
}
@@ -928,6 +932,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
return CmpInst::FCMP_OLT;
case RecurKind::FMax:
return CmpInst::FCMP_OGT;
+ // We do not add FMinimum/FMaximum recurrence kind here since there is no
+ // equivalent predicate which compares signed zeroes according to the
+ // semantics of the intrinsics (llvm.minimum/maximum).
}
}
@@ -943,7 +950,8 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
- if (Ty->isIntOrIntVectorTy()) {
+ if (Ty->isIntOrIntVectorTy() ||
+ (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
// TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
@@ -1094,6 +1102,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
return Builder.CreateFPMaxReduce(Src);
case RecurKind::FMin:
return Builder.CreateFPMinReduce(Src);
+ case RecurKind::FMinimum:
+ return Builder.CreateFPMinimumReduce(Src);
+ case RecurKind::FMaximum:
+ return Builder.CreateFPMaximumReduce(Src);
default:
llvm_unreachable("Unhandled opcode");
}
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index 65c04ca1a89ee..85a90f2e04c5e 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
+; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -1090,6 +1090,120 @@ for.body: ; preds = %entry, %for.body
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
+; CHECK-LABEL: fmaximum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32
+define float @fmaximum_intrinsic(ptr nocapture readonly %x) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret float %1
+
+for.body: ; preds = %entry, %for.body
+ %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+ %0 = load float, ptr %arrayidx, align 4
+ %1 = tail call float @llvm.maximum.f32(float %s.011, float %0)
+ %inc = add nuw nsw i32 %i.012, 1
+ %exitcond.not = icmp eq i32 %inc, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_intrinsic(ptr nocapture readonly %x) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret float %1
+
+for.body: ; preds = %entry, %for.body
+ %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+ %0 = load float, ptr %arrayidx, align 4
+ %1 = tail call float @llvm.minimum.f32(float %s.011, float %0)
+ %inc = add nuw nsw i32 %i.012, 1
+ %exitcond.not = icmp eq i32 %inc, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret float %cond9
+
+for.body: ; preds = %entry, %for.body
+ %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+ %0 = load float, ptr %arrayidx, align 4
+ %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0)
+ %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+ %1 = load float, ptr %arrayidx3, align 4
+ %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+ %inc = add nuw nsw i32 %i.025, 1
+ %exitcond.not = icmp eq i32 %inc, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum_one_with_flags
+; CHECK-LABEL: vector.body:
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret float %cond9
+
+for.body: ; preds = %entry, %for.body
+ %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+ %0 = load float, ptr %arrayidx, align 4
+ %s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0)
+ %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+ %1 = load float, ptr %arrayidx3, align 4
+ %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+ %inc = add nuw nsw i32 %i.025, 1
+ %exitcond.not = icmp eq i32 %inc, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
; Make sure any check-not directives are not triggered by function declarations.
; CHECK: declare
@@ -1099,6 +1213,8 @@ declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare float @llvm.minnum.f32(float, float)
declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.minimum.f32(float, float)
+declare float @llvm.maximum.f32(float, float)
attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
attributes #1 = { "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list