[llvm] [IVDescriptors] Support reductions with minimumnum/maximumnum. (PR #137335)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 25 07:13:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Add a new reduction recurrence kind for reductions with minimumnum/maximumnum. Such reductions can be vectorized without nsz/nnans, same as reductions with maximum/minimum intrinsics.
Note that a new reduction kind is needed to make sure partial reductions are also combined with minimumnum/maximumnum.
Note that the final reduction to a scalar value is performed with vector.reduce.fmin/fmax. This should be fine, as the results of the partial reductions with maximumnum/minimumnum silences any sNaNs.
In-loop and reductions in SLP are not supported yet, as there's no reduction version of maximumnum/minimumnum yet and fmax may be incorrect.
---
Patch is 26.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137335.diff
6 Files Affected:
- (modified) llvm/include/llvm/Analysis/IVDescriptors.h (+4-1)
- (modified) llvm/lib/Analysis/IVDescriptors.cpp (+26-3)
- (modified) llvm/lib/Transforms/Utils/LoopUtils.cpp (+12-1)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+9-1)
- (modified) llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll (+136-28)
- (added) llvm/test/Transforms/SLPVectorizer/AArch64/reduce-maximumnum-minimumnum.ll (+62)
``````````diff
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 42bccdc028461..140edff13a67f 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
FMax, ///< FP max implemented in terms of select(cmp()).
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
+ FMinimumNum, ///< FP min with llvm.minimumnum semantics
+ FMaximumNum, ///< FP max with llvm.maximumnum semantics
FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum).
IAnyOf, ///< Any_of reduction with select(icmp(),x,y) where one of (x,y) is
///< loop invariant, and both x and y are integer type.
@@ -239,7 +241,8 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
- Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
+ Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
+ Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
}
/// Returns true if the recurrence kind is any min/max kind.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 94c347b01bbfb..e7e0bef048f71 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -788,6 +788,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
+ if (match(I, m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMinimumNum, I);
+ if (match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMaximumNum, I);
if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMinimum, I);
if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
@@ -892,10 +896,14 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
return true;
if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
return true;
- // minimum and maximum intrinsics do not require nsz and nnan flags since
- // NaN and signed zeroes are propagated in the intrinsic implementation.
+ // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan
+ // flags since NaN and signed zeroes are propagated in the intrinsic
+ // implementation.
return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
- match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
+ match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())) ||
+ match(I,
+ m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
+ match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
@@ -1035,6 +1043,19 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FMaximumNum, TheLoop, FMF, RedDes, DB, AC,
+ DT, SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi
+ << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::FMinimumNum, TheLoop, FMF, RedDes, DB, AC,
+ DT, SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi
+ << "\n");
+ return true;
+ }
+
// Not a reduction of known type.
return false;
}
@@ -1155,6 +1176,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::FMin:
case RecurKind::FMaximum:
case RecurKind::FMinimum:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMinimumNum:
case RecurKind::FAnyOf:
case RecurKind::FFindLastIV:
return Instruction::FCmp;
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index f57d95e7722dc..2fff9521017ff 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -958,6 +958,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
return Intrinsic::vector_reduce_fmaximum;
case RecurKind::FMinimum:
return Intrinsic::vector_reduce_fminimum;
+ case RecurKind::FMaximumNum:
+ return Intrinsic::vector_reduce_fmax;
+ case RecurKind::FMinimumNum:
+ return Intrinsic::vector_reduce_fmin;
}
}
@@ -1053,6 +1057,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
return Intrinsic::minimum;
case RecurKind::FMaximum:
return Intrinsic::maximum;
+ case RecurKind::FMinimumNum:
+ return Intrinsic::minimumnum;
+ case RecurKind::FMaximumNum:
+ return Intrinsic::maximumnum;
}
}
@@ -1101,7 +1109,8 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
if (Ty->isIntOrIntVectorTy() ||
- (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
+ (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
+ RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
// TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
@@ -1320,6 +1329,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::FMin:
case RecurKind::FMinimum:
case RecurKind::FMaximum:
+ case RecurKind::FMinimumNum:
+ case RecurKind::FMaximumNum:
return Builder.CreateUnaryIntrinsic(getReductionIntrinsicID(RdxKind), Src);
case RecurKind::FMulAdd:
case RecurKind::FAdd:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 53da78ee599b7..56a3bf74814b5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21774,7 +21774,9 @@ class HorizontalReduction {
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMaximum:
- case RecurKind::FMinimum: {
+ case RecurKind::FMinimum:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMinimumNum: {
Intrinsic::ID Id = llvm::getMinMaxReductionIntrinsicOp(Kind);
return Builder.CreateBinaryIntrinsic(Id, LHS, RHS);
}
@@ -23086,6 +23088,8 @@ class HorizontalReduction {
case RecurKind::FAnyOf:
case RecurKind::IFindLastIV:
case RecurKind::FFindLastIV:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMinimumNum:
case RecurKind::None:
llvm_unreachable("Unexpected reduction kind for repeated scalar.");
}
@@ -23220,6 +23224,8 @@ class HorizontalReduction {
case RecurKind::FAnyOf:
case RecurKind::IFindLastIV:
case RecurKind::FFindLastIV:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMinimumNum:
case RecurKind::None:
llvm_unreachable("Unexpected reduction kind for repeated scalar.");
}
@@ -23319,6 +23325,8 @@ class HorizontalReduction {
case RecurKind::FAnyOf:
case RecurKind::IFindLastIV:
case RecurKind::FFindLastIV:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMinimumNum:
case RecurKind::None:
llvm_unreachable("Unexpected reduction kind for reused scalars.");
}
diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll
index eb6dcc72df57e..6dde2b9adc7c8 100644
--- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll
@@ -6,18 +6,42 @@ define float @maximumnum_intrinsic(ptr readonly %x) {
; CHECK-LABEL: define float @maximumnum_intrinsic(
; CHECK-SAME: ptr readonly [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI1]], <2 x float> [[WIDE_LOAD2]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[IV]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]])
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
-; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4
; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.maximumnum.f32(float [[RED]], float [[L]])
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]]
;
entry:
@@ -41,18 +65,42 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) {
; CHECK-LABEL: define float @maximumnum_intrinsic_fast(
; CHECK-SAME: ptr readonly [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI1]], <2 x float> [[WIDE_LOAD2]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[IV]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]])
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
-; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4
; CHECK-NEXT: [[RED_NEXT]] = tail call fast float @llvm.maximumnum.f32(float [[RED]], float [[L]])
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]]
;
entry:
@@ -76,18 +124,42 @@ define float @minimumnum_intrinsic(ptr readonly %x) {
; CHECK-LABEL: define float @minimumnum_intrinsic(
; CHECK-SAME: ptr readonly [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI1]], <2 x float> [[WIDE_LOAD2]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[IV]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> [[RDX_MINMAX]])
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
-; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4
; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.minimumnum.f32(float [[RED]], float [[L]])
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]]
;
entry:
@@ -111,18 +183,42 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) {
; CHECK-LABEL: define float @minimumnum_intrinsic_fast(
; CHECK-SAME: ptr readonly [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = l...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137335
More information about the llvm-commits
mailing list