[llvm] d9c52c3 - [LV][IVDescriptors] Fix recurrence identity element for FMin and FMax reductions

Fri Nov 4 07:39:47 PDT 2022

Author: Karthik Senthil
Date: 2022-11-04T10:39:37-04:00
New Revision: d9c52c31a0fee22414bb4024ee5b203fe86ef5bb

URL: https://github.com/llvm/llvm-project/commit/d9c52c31a0fee22414bb4024ee5b203fe86ef5bb
DIFF: https://github.com/llvm/llvm-project/commit/d9c52c31a0fee22414bb4024ee5b203fe86ef5bb.diff

LOG: [LV][IVDescriptors] Fix recurrence identity element for FMin and FMax reductions

For a min and max reduction idioms, the identity (i.e. neutral) element
should be datatype's highest and lowest possible values respectively.
Current implementation in IVDescriptors incorrectly returns -Inf for FMin
reduction and +Inf for FMax reduction. This patch fixes this bug which
was causing incorrect reduction computation results in loops vectorized
by LV.

Differential Revision: https://reviews.llvm.org/D137220

Added: 
    

Modified: 
    llvm/lib/Analysis/IVDescriptors.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
    llvm/unittests/Analysis/IVDescriptorsTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index e42512b41aa66..c76155832bce3 100644

--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1111,9 +1111,13 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
     return ConstantInt::get(Tp,
                             APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
   case RecurKind::FMin:
-    return ConstantFP::getInfinity(Tp, true);
+    assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
+           "nnan, nsz is expected to be set for FP min reduction.");
+    return ConstantFP::getInfinity(Tp, false /*Negative*/);
   case RecurKind::FMax:
-    return ConstantFP::getInfinity(Tp, false);
+    assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
+           "nnan, nsz is expected to be set for FP max reduction.");
+    return ConstantFP::getInfinity(Tp, true /*Negative*/);
   case RecurKind::SelectICmp:
   case RecurKind::SelectFCmp:
     return getRecurrenceStartValue();

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
index 091b117c182ac..9ae930514e613 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
@@ -117,7 +117,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <vscale x 4 x float>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0xFFF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x7FF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]])
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]]

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 8be98ebf087d5..ff3a071fc507a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -168,7 +168,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
+; CHECK-NEXT:    [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
 ; CHECK-NEXT:    [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]])
 ; CHECK-NEXT:    [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

diff  --git a/llvm/unittests/Analysis/IVDescriptorsTest.cpp b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
index e7948db10ae66..fd9a5a801042c 100644
--- a/llvm/unittests/Analysis/IVDescriptorsTest.cpp
+++ b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
@@ -203,3 +203,107 @@ TEST(IVDescriptorsTest, LoopWithPtrToInt) {
         EXPECT_TRUE(IsInductionPHI);
       });
 }
+
+// This tests that correct identity value is returned for a RecurrenceDescriptor
+// that describes FMin reduction idiom.
+TEST(IVDescriptorsTest, FMinRednIdentity) {
+  // Parse the module.
+  LLVMContext Context;
+
+  std::unique_ptr<Module> M = parseIR(Context,
+                                      R"(define float @foo(float* %A, i64 %ub) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %fmin = phi float [ 1.000000e+00, %entry ], [ %fmin.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %i
+  %ld = load float, float* %arrayidx
+  %fmin.cmp = fcmp nnan nsz olt float %fmin, %ld
+  %fmin.next = select nnan nsz i1 %fmin.cmp, float %fmin, float %ld
+  %i.next = add nsw i64 %i, 1
+  %cmp = icmp slt i64 %i.next, %ub
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  %fmin.lcssa = phi float [ %fmin.next, %for.body ]
+  ret float %fmin.lcssa
+})");
+
+  runWithLoopInfoAndSE(
+      *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+        Function::iterator FI = F.begin();
+        // First basic block is entry - skip it.
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+        BasicBlock::iterator BBI = Header->begin();
+        assert((&*BBI)->getName() == "i");
+        ++BBI;
+        PHINode *Phi = dyn_cast<PHINode>(&*BBI);
+        assert(Phi->getName() == "fmin");
+        RecurrenceDescriptor Rdx;
+        bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx);
+        EXPECT_TRUE(IsRdxPhi);
+        RecurKind Kind = Rdx.getRecurrenceKind();
+        EXPECT_EQ(Kind, RecurKind::FMin);
+        Type *Ty = Phi->getType();
+        Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags());
+        // Identity value for FP min reduction is +Inf.
+        EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, false /*Negative*/));
+      });
+}
+
+// This tests that correct identity value is returned for a RecurrenceDescriptor
+// that describes FMax reduction idiom.
+TEST(IVDescriptorsTest, FMaxRednIdentity) {
+  // Parse the module.
+  LLVMContext Context;
+
+  std::unique_ptr<Module> M = parseIR(Context,
+                                      R"(define float @foo(float* %A, i64 %ub) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %fmax = phi float [ 1.000000e+00, %entry ], [ %fmax.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %i
+  %ld = load float, float* %arrayidx
+  %fmax.cmp = fcmp nnan nsz ogt float %fmax, %ld
+  %fmax.next = select nnan nsz i1 %fmax.cmp, float %fmax, float %ld
+  %i.next = add nsw i64 %i, 1
+  %cmp = icmp slt i64 %i.next, %ub
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  %fmax.lcssa = phi float [ %fmax.next, %for.body ]
+  ret float %fmax.lcssa
+})");
+
+  runWithLoopInfoAndSE(
+      *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+        Function::iterator FI = F.begin();
+        // First basic block is entry - skip it.
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+        BasicBlock::iterator BBI = Header->begin();
+        assert((&*BBI)->getName() == "i");
+        ++BBI;
+        PHINode *Phi = dyn_cast<PHINode>(&*BBI);
+        assert(Phi->getName() == "fmax");
+        RecurrenceDescriptor Rdx;
+        bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx);
+        EXPECT_TRUE(IsRdxPhi);
+        RecurKind Kind = Rdx.getRecurrenceKind();
+        EXPECT_EQ(Kind, RecurKind::FMax);
+        Type *Ty = Phi->getType();
+        Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags());
+        // Identity value for FP max reduction is -Inf.
+        EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, true /*Negative*/));
+      });
+}