[llvm] b6e102e - [SCEV] Don't use non-deterministic constant folding for trip counts (#90942)

Sun May 19 22:40:57 PDT 2024

Author: Nikita Popov
Date: 2024-05-20T07:40:54+02:00
New Revision: b6e102e08cd35543175459494211a3a15f793302

URL: https://github.com/llvm/llvm-project/commit/b6e102e08cd35543175459494211a3a15f793302
DIFF: https://github.com/llvm/llvm-project/commit/b6e102e08cd35543175459494211a3a15f793302.diff

LOG: [SCEV] Don't use non-deterministic constant folding for trip counts (#90942)

When calculating the exit count exhaustively, if any of the involved
operations is non-deterministic, the exit count we compute at
compile-time and the exit count at run-time may differ. Using these
non-deterministic constant folding results is only correct if we
actually replace all uses of the instruction with the value. SCEV (or
its consumers) generally don't do this.

Handle this by adding a new AllowNonDeterministic flag to the constant
folding API, and disabling it in SCEV. If non-deterministic results are
not allowed, do not fold FP lib calls in general, and FP operations
returning NaNs in particular. This could be made more precise (some FP
libcalls like fabs are fully deterministic), but I don't think this that
precise handling here is worthwhile.

Fixes the interesting part of
https://github.com/llvm/llvm-project/issues/89885.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ConstantFolding.h
    llvm/lib/Analysis/ConstantFolding.cpp
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index c54b1e8f01d2b..58b38fb8b0367 100644

--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -68,9 +68,16 @@ Constant *ConstantFoldConstant(const Constant *C, const DataLayout &DL,
 /// fold instructions like loads and stores, which have no constant expression
 /// form.
 ///
+/// In some cases, constant folding may return one value chosen from a set of
+/// multiple legal return values. For example, the exact bit pattern of NaN
+/// results is not guaranteed. Using such a result is usually only valid if
+/// all uses of the original operation are replaced by the constant-folded
+/// result. The \p AllowNonDeterministic parameter controls whether this is
+/// allowed.
 Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
                                    const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI = nullptr);
+                                   const TargetLibraryInfo *TLI = nullptr,
+                                   bool AllowNonDeterministic = true);
 
 /// Attempt to constant fold a compare instruction (icmp/fcmp) with the
 /// specified operands. Returns null or a constant expression of the specified
@@ -95,7 +102,8 @@ Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
 /// Returns null or a constant expression of the specified operands on failure.
 Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
                                      Constant *RHS, const DataLayout &DL,
-                                     const Instruction *I);
+                                     const Instruction *I,
+                                     bool AllowNonDeterministic = true);
 
 /// Attempt to flush float point constant according to denormal mode set in the
 /// instruction's parent function attributes. If so, return a zero with the
@@ -190,7 +198,8 @@ bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(const CallBase *Call, Function *F,
                            ArrayRef<Constant *> Operands,
-                           const TargetLibraryInfo *TLI = nullptr);
+                           const TargetLibraryInfo *TLI = nullptr,
+                           bool AllowNonDeterministic = true);
 
 Constant *ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                       Constant *RHS, Type *Ty,

diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 046a769453808..524e84f3f3ded 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -992,7 +992,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
-                                       const TargetLibraryInfo *TLI) {
+                                       const TargetLibraryInfo *TLI,
+                                       bool AllowNonDeterministic) {
   Type *DestTy = InstOrCE->getType();
 
   if (Instruction::isUnaryOp(Opcode))
@@ -1011,7 +1012,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
       // TODO: If a constant expression is being folded rather than an
       // instruction, denormals will not be flushed/treated as zero
       if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
-        return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I);
+        return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,
+                                          AllowNonDeterministic);
       }
     }
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -1053,7 +1055,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
     if (auto *F = dyn_cast<Function>(Ops.back())) {
       const auto *Call = cast<CallBase>(InstOrCE);
       if (canConstantFoldCallTo(Call, F))
-        return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
+        return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,
+                                AllowNonDeterministic);
     }
     return nullptr;
   case Instruction::Select:
@@ -1114,8 +1117,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
   }
 
   if (auto *CE = dyn_cast<ConstantExpr>(C)) {
-    if (Constant *Res =
-            ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI))
+    if (Constant *Res = ConstantFoldInstOperandsImpl(
+            CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
       return Res;
     return const_cast<Constant *>(C);
   }
@@ -1183,8 +1186,10 @@ Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
 Constant *llvm::ConstantFoldInstOperands(Instruction *I,
                                          ArrayRef<Constant *> Ops,
                                          const DataLayout &DL,
-                                         const TargetLibraryInfo *TLI) {
-  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
+                                         const TargetLibraryInfo *TLI,
+                                         bool AllowNonDeterministic) {
+  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,
+                                      AllowNonDeterministic);
 }
 
 Constant *llvm::ConstantFoldCompareInstOperands(
@@ -1357,7 +1362,8 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
 
 Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
                                            Constant *RHS, const DataLayout &DL,
-                                           const Instruction *I) {
+                                           const Instruction *I,
+                                           bool AllowNonDeterministic) {
   if (Instruction::isBinaryOp(Opcode)) {
     // Flush denormal inputs if needed.
     Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
@@ -1367,13 +1373,30 @@ Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
     if (!Op1)
       return nullptr;
 
+    // If nsz or an algebraic FMF flag is set, the result of the FP operation
+    // may change due to future optimization. Don't constant fold them if
+    // non-deterministic results are not allowed.
+    if (!AllowNonDeterministic)
+      if (auto *FP = dyn_cast_or_null<FPMathOperator>(I))
+        if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
+            FP->hasAllowContract() || FP->hasAllowReciprocal())
+          return nullptr;
+
     // Calculate constant result.
     Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
     if (!C)
       return nullptr;
 
     // Flush denormal output if needed.
-    return FlushFPConstant(C, I, /* IsOutput */ true);
+    C = FlushFPConstant(C, I, /* IsOutput */ true);
+    if (!C)
+      return nullptr;
+
+    // The precise NaN value is non-deterministic.
+    if (!AllowNonDeterministic && C->isNaN())
+      return nullptr;
+
+    return C;
   }
   // If instruction lacks a parent/function and the denormal mode cannot be
   // determined, use the default (IEEE).
@@ -3401,7 +3424,8 @@ Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
 
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                  ArrayRef<Constant *> Operands,
-                                 const TargetLibraryInfo *TLI) {
+                                 const TargetLibraryInfo *TLI,
+                                 bool AllowNonDeterministic) {
   if (Call->isNoBuiltin())
     return nullptr;
   if (!F->hasName())
@@ -3417,8 +3441,13 @@ Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
       return nullptr;
   }
 
-  StringRef Name = F->getName();
+  // Conservatively assume that floating-point libcalls may be
+  // non-deterministic.
   Type *Ty = F->getReturnType();
+  if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
+    return nullptr;
+
+  StringRef Name = F->getName();
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
     return ConstantFoldFixedVectorCall(
         Name, IID, FVTy, Operands, F->getParent()->getDataLayout(), TLI, Call);

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 254d79183a1e9..704f92669a117 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9540,7 +9540,8 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
     Operands[i] = C;
   }
 
-  return ConstantFoldInstOperands(I, Operands, DL, TLI);
+  return ConstantFoldInstOperands(I, Operands, DL, TLI,
+                                  /*AllowNonDeterministic=*/false);
 }
 
 
@@ -10031,7 +10032,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
 
     Constant *C = nullptr;
     const DataLayout &DL = getDataLayout();
-    C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
+    C = ConstantFoldInstOperands(I, Operands, DL, &TLI,
+                                 /*AllowNonDeterministic=*/false);
     if (!C)
       return V;
     return getSCEV(C);

diff  --git a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
index 21237f4266933..cc08fa5fc7d87 100644
--- a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
@@ -27,4 +27,156 @@ for.cond.cleanup:
   ret void
 }
 
+; Do not compute exhaustive trip count based on FP libcalls, as their exact
+; return value may not be specified.
+define i64 @test_fp_libcall() {
+; CHECK-LABEL: 'test_fp_libcall'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_libcall
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = call double @llvm.sin.f64(double %fv)
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 0x3FC6BA15EE8460B0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+; Do not compute exhaustive trip count based on FP constant folding resulting
+; in NaN values, as we don't specify which NaN exactly is returned.
+define i64 @test_nan_sign() {
+; CHECK-LABEL: 'test_nan_sign'
+; CHECK-NEXT:  Determining loop execution counts for: @test_nan_sign
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ -1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %a = fsub double %fv, 0x7F86C16C16C16C16
+  %b = fadd double %a, %a
+  %fv.next = fsub double %b, %a
+  %iv.next = add i64 %iv, 1
+  %fv.bc = bitcast double %fv to i64
+  %icmp = icmp slt i64 %fv.bc, 0
+  br i1 %icmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+; Do not compute exhaustive trip count based on FP constant folding if the
+; involved operation has nsz or one of the algebraic FMF flags (reassoc, arcp,
+; contract) set. The examples in the following are dummies and don't illustrate
+; real cases where FMF transforms could cause issues.
+
+define i64 @test_fp_nsz() {
+; CHECK-LABEL: 'test_fp_nsz'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_nsz
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd nsz double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_reassoc() {
+; CHECK-LABEL: 'test_fp_reassoc'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_reassoc
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd reassoc double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_arcp() {
+; CHECK-LABEL: 'test_fp_arcp'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_arcp
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd arcp double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_contract() {
+; CHECK-LABEL: 'test_fp_contract'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_contract
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd contract double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
 declare void @dummy()
+declare void @use(double %i)
+declare double @llvm.sin.f64(double)