[llvm] d9c52c3 - [LV][IVDescriptors] Fix recurrence identity element for FMin and FMax reductions
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 4 07:39:47 PDT 2022
Author: Karthik Senthil
Date: 2022-11-04T10:39:37-04:00
New Revision: d9c52c31a0fee22414bb4024ee5b203fe86ef5bb
URL: https://github.com/llvm/llvm-project/commit/d9c52c31a0fee22414bb4024ee5b203fe86ef5bb
DIFF: https://github.com/llvm/llvm-project/commit/d9c52c31a0fee22414bb4024ee5b203fe86ef5bb.diff
LOG: [LV][IVDescriptors] Fix recurrence identity element for FMin and FMax reductions
For a min and max reduction idioms, the identity (i.e. neutral) element
should be datatype's highest and lowest possible values respectively.
Current implementation in IVDescriptors incorrectly returns -Inf for FMin
reduction and +Inf for FMax reduction. This patch fixes this bug which
was causing incorrect reduction computation results in loops vectorized
by LV.
Differential Revision: https://reviews.llvm.org/D137220
Added:
Modified:
llvm/lib/Analysis/IVDescriptors.cpp
llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
llvm/unittests/Analysis/IVDescriptorsTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index e42512b41aa66..c76155832bce3 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1111,9 +1111,13 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantInt::get(Tp,
APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
case RecurKind::FMin:
- return ConstantFP::getInfinity(Tp, true);
+ assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
+ "nnan, nsz is expected to be set for FP min reduction.");
+ return ConstantFP::getInfinity(Tp, false /*Negative*/);
case RecurKind::FMax:
- return ConstantFP::getInfinity(Tp, false);
+ assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
+ "nnan, nsz is expected to be set for FP max reduction.");
+ return ConstantFP::getInfinity(Tp, true /*Negative*/);
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return getRecurrenceStartValue();
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
index 091b117c182ac..9ae930514e613 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
@@ -117,7 +117,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <vscale x 4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> poison)
-; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0xFFF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x7FF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]])
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 8be98ebf087d5..ff3a071fc507a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -168,7 +168,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
+; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
; CHECK-NEXT: [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]])
; CHECK-NEXT: [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/unittests/Analysis/IVDescriptorsTest.cpp b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
index e7948db10ae66..fd9a5a801042c 100644
--- a/llvm/unittests/Analysis/IVDescriptorsTest.cpp
+++ b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
@@ -203,3 +203,107 @@ TEST(IVDescriptorsTest, LoopWithPtrToInt) {
EXPECT_TRUE(IsInductionPHI);
});
}
+
+// This tests that correct identity value is returned for a RecurrenceDescriptor
+// that describes FMin reduction idiom.
+TEST(IVDescriptorsTest, FMinRednIdentity) {
+ // Parse the module.
+ LLVMContext Context;
+
+ std::unique_ptr<Module> M = parseIR(Context,
+ R"(define float @foo(float* %A, i64 %ub) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %fmin = phi float [ 1.000000e+00, %entry ], [ %fmin.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %i
+ %ld = load float, float* %arrayidx
+ %fmin.cmp = fcmp nnan nsz olt float %fmin, %ld
+ %fmin.next = select nnan nsz i1 %fmin.cmp, float %fmin, float %ld
+ %i.next = add nsw i64 %i, 1
+ %cmp = icmp slt i64 %i.next, %ub
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ %fmin.lcssa = phi float [ %fmin.next, %for.body ]
+ ret float %fmin.lcssa
+})");
+
+ runWithLoopInfoAndSE(
+ *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+ Function::iterator FI = F.begin();
+ // First basic block is entry - skip it.
+ BasicBlock *Header = &*(++FI);
+ assert(Header->getName() == "for.body");
+ Loop *L = LI.getLoopFor(Header);
+ EXPECT_NE(L, nullptr);
+ BasicBlock::iterator BBI = Header->begin();
+ assert((&*BBI)->getName() == "i");
+ ++BBI;
+ PHINode *Phi = dyn_cast<PHINode>(&*BBI);
+ assert(Phi->getName() == "fmin");
+ RecurrenceDescriptor Rdx;
+ bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx);
+ EXPECT_TRUE(IsRdxPhi);
+ RecurKind Kind = Rdx.getRecurrenceKind();
+ EXPECT_EQ(Kind, RecurKind::FMin);
+ Type *Ty = Phi->getType();
+ Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags());
+ // Identity value for FP min reduction is +Inf.
+ EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, false /*Negative*/));
+ });
+}
+
+// This tests that correct identity value is returned for a RecurrenceDescriptor
+// that describes FMax reduction idiom.
+TEST(IVDescriptorsTest, FMaxRednIdentity) {
+ // Parse the module.
+ LLVMContext Context;
+
+ std::unique_ptr<Module> M = parseIR(Context,
+ R"(define float @foo(float* %A, i64 %ub) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %fmax = phi float [ 1.000000e+00, %entry ], [ %fmax.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %i
+ %ld = load float, float* %arrayidx
+ %fmax.cmp = fcmp nnan nsz ogt float %fmax, %ld
+ %fmax.next = select nnan nsz i1 %fmax.cmp, float %fmax, float %ld
+ %i.next = add nsw i64 %i, 1
+ %cmp = icmp slt i64 %i.next, %ub
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ %fmax.lcssa = phi float [ %fmax.next, %for.body ]
+ ret float %fmax.lcssa
+})");
+
+ runWithLoopInfoAndSE(
+ *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+ Function::iterator FI = F.begin();
+ // First basic block is entry - skip it.
+ BasicBlock *Header = &*(++FI);
+ assert(Header->getName() == "for.body");
+ Loop *L = LI.getLoopFor(Header);
+ EXPECT_NE(L, nullptr);
+ BasicBlock::iterator BBI = Header->begin();
+ assert((&*BBI)->getName() == "i");
+ ++BBI;
+ PHINode *Phi = dyn_cast<PHINode>(&*BBI);
+ assert(Phi->getName() == "fmax");
+ RecurrenceDescriptor Rdx;
+ bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx);
+ EXPECT_TRUE(IsRdxPhi);
+ RecurKind Kind = Rdx.getRecurrenceKind();
+ EXPECT_EQ(Kind, RecurKind::FMax);
+ Type *Ty = Phi->getType();
+ Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags());
+ // Identity value for FP max reduction is -Inf.
+ EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, true /*Negative*/));
+ });
+}
More information about the llvm-commits
mailing list