[clang] Reapply "[analyzer] Handle [[assume(cond)]] as __builtin_assume(cond)" (PR #129234)

via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 28 04:10:46 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Balazs Benics (steakhal)

<details>
<summary>Changes</summary>

This is the second attempt to bring initial support for [[assume()]] in the Clang Static Analyzer.
The first attempt (#<!-- -->116462) was reverted in 2b9abf0db2d106c7208b4372e662ef5df869e6f1 due to some weird failure in a libcxx test involving `#pragma clang loop vectorize(enable) interleave(enable)`.

The failure could be reduced into:
```c++
template <class ExecutionPolicy>
void transform(ExecutionPolicy) {
  #pragma clang loop vectorize(enable) interleave(enable)
  for (int i = 0; 0;) { // The DeclStmt of "i" would be added twice in the ThreadSafety analysis.
    // empty
  }
}
void entrypoint() {
  transform(1);
}
```

As it turns out, the problem with the initial patch was this:
```c++
for (const auto *Attr : AS->getAttrs()) {
  if (const auto *AssumeAttr = dyn_cast<CXXAssumeAttr>(Attr)) {
    Expr *AssumeExpr = AssumeAttr->getAssumption();
    if (!AssumeExpr->HasSideEffects(Ctx)) {
      childrenBuf.push_back(AssumeExpr);
    }
  }
  // Visit the actual children AST nodes.
  // For CXXAssumeAttrs, this is always a NullStmt.
  llvm::append_range(childrenBuf, AS->children()); // <--- This was not meant to be part of the "for" loop.
  children = childrenBuf;
}
return;
 ```

The solution was simple. Just hoist it from the loop.

I also had a closer look at `CFGBuilder::VisitAttributedStmt`, where I also spotted another bug.
We would have added the CFG blocks twice if the AttributedStmt would have both the `[[fallthrough]]` and the `[[assume()]]` attributes. With my fix, it will only once add the blocks. Added a regression test for this.

Co-authored-by: Vinay Deshmukh <vinay_deshmukh AT outlook DOT com>

---
Full diff: https://github.com/llvm/llvm-project/pull/129234.diff


8 Files Affected:

- (modified) clang/include/clang/AST/AttrIterator.h (+12) 
- (modified) clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h (+4) 
- (modified) clang/lib/Analysis/CFG.cpp (+48-16) 
- (modified) clang/lib/StaticAnalyzer/Core/ExprEngine.cpp (+7-1) 
- (modified) clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp (+4-3) 
- (modified) clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp (+18) 
- (added) clang/test/Analysis/cxx23-assume-attribute.cpp (+77) 
- (modified) clang/test/Analysis/out-of-bounds-new.cpp (+59-1) 


``````````diff
diff --git a/clang/include/clang/AST/AttrIterator.h b/clang/include/clang/AST/AttrIterator.h
index 7e2bb0381d4c8..2f39c144dc160 100644
--- a/clang/include/clang/AST/AttrIterator.h
+++ b/clang/include/clang/AST/AttrIterator.h
@@ -16,6 +16,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ADL.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
 #include <cstddef>
@@ -124,6 +125,17 @@ inline auto *getSpecificAttr(const Container &container) {
   return It != specific_attr_end<IterTy>(container) ? *It : nullptr;
 }
 
+template <typename SpecificAttr, typename Container>
+inline auto getSpecificAttrs(const Container &container) {
+  using ValueTy = llvm::detail::ValueOfRange<Container>;
+  using ValuePointeeTy = std::remove_pointer_t<ValueTy>;
+  using IterTy = std::conditional_t<std::is_const_v<ValuePointeeTy>,
+                                    const SpecificAttr, SpecificAttr>;
+  auto Begin = specific_attr_begin<IterTy>(container);
+  auto End = specific_attr_end<IterTy>(container);
+  return llvm::make_range(Begin, End);
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_AST_ATTRITERATOR_H
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 9fd07ce47175c..804fc74b009df 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -498,6 +498,10 @@ class ExprEngine {
   void VisitInitListExpr(const InitListExpr *E, ExplodedNode *Pred,
                          ExplodedNodeSet &Dst);
 
+  /// VisitAttributedStmt - Transfer function logic for AttributedStmt
+  void VisitAttributedStmt(const AttributedStmt *A, ExplodedNode *Pred,
+                           ExplodedNodeSet &Dst);
+
   /// VisitLogicalExpr - Transfer function logic for '&&', '||'
   void VisitLogicalExpr(const BinaryOperator* B, ExplodedNode *Pred,
                         ExplodedNodeSet &Dst);
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 3e144395cffc6..3f1e0c2b233a6 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -433,7 +433,7 @@ class reverse_children {
   ArrayRef<Stmt *> children;
 
 public:
-  reverse_children(Stmt *S);
+  reverse_children(Stmt *S, ASTContext &Ctx);
 
   using iterator = ArrayRef<Stmt *>::reverse_iterator;
 
@@ -443,21 +443,44 @@ class reverse_children {
 
 } // namespace
 
-reverse_children::reverse_children(Stmt *S) {
-  if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
-    children = CE->getRawSubExprs();
+reverse_children::reverse_children(Stmt *S, ASTContext &Ctx) {
+  switch (S->getStmtClass()) {
+  case Stmt::CallExprClass: {
+    children = cast<CallExpr>(S)->getRawSubExprs();
     return;
   }
-  switch (S->getStmtClass()) {
-    // Note: Fill in this switch with more cases we want to optimize.
-    case Stmt::InitListExprClass: {
-      InitListExpr *IE = cast<InitListExpr>(S);
-      children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
-                                IE->getNumInits());
-      return;
+
+  // Note: Fill in this switch with more cases we want to optimize.
+  case Stmt::InitListExprClass: {
+    InitListExpr *IE = cast<InitListExpr>(S);
+    children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()),
+                              IE->getNumInits());
+    return;
+  }
+
+  case Stmt::AttributedStmtClass: {
+    // for an attributed stmt, the "children()" returns only the NullStmt
+    // (;) but semantically the "children" are supposed to be the
+    // expressions _within_ i.e. the two square brackets i.e. [[ HERE ]]
+    // so we add the subexpressions first, _then_ add the "children"
+    auto *AS = cast<AttributedStmt>(S);
+    for (const auto *Attr : AS->getAttrs()) {
+      if (const auto *AssumeAttr = dyn_cast<CXXAssumeAttr>(Attr)) {
+        Expr *AssumeExpr = AssumeAttr->getAssumption();
+        if (!AssumeExpr->HasSideEffects(Ctx)) {
+          childrenBuf.push_back(AssumeExpr);
+        }
+      }
     }
-    default:
-      break;
+
+    // Visit the actual children AST nodes.
+    // For CXXAssumeAttrs, this is always a NullStmt.
+    llvm::append_range(childrenBuf, AS->children());
+    children = childrenBuf;
+    return;
+  }
+  default:
+    break;
   }
 
   // Default case for all other statements.
@@ -2433,7 +2456,7 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
 
   // Visit the children in their reverse order so that they appear in
   // left-to-right (natural) order in the CFG.
-  reverse_children RChildren(S);
+  reverse_children RChildren(S, *Context);
   for (Stmt *Child : RChildren) {
     if (Child)
       if (CFGBlock *R = Visit(Child))
@@ -2449,7 +2472,7 @@ CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
   }
   CFGBlock *B = Block;
 
-  reverse_children RChildren(ILE);
+  reverse_children RChildren(ILE, *Context);
   for (Stmt *Child : RChildren) {
     if (!Child)
       continue;
@@ -2484,6 +2507,14 @@ static bool isFallthroughStatement(const AttributedStmt *A) {
   return isFallthrough;
 }
 
+static bool isCXXAssumeAttr(const AttributedStmt *A) {
+  bool hasAssumeAttr = hasSpecificAttr<CXXAssumeAttr>(A->getAttrs());
+
+  assert((!hasAssumeAttr || isa<NullStmt>(A->getSubStmt())) &&
+         "expected [[assume]] not to have children");
+  return hasAssumeAttr;
+}
+
 CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
                                           AddStmtChoice asc) {
   // AttributedStmts for [[likely]] can have arbitrary statements as children,
@@ -2494,7 +2525,8 @@ CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
   // So only add the AttributedStmt for FallThrough, which has CFG effects and
   // also no children, and omit the others. None of the other current StmtAttrs
   // have semantic meaning for the CFG.
-  if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) {
+  bool isInterestingAttribute = isFallthroughStatement(A) || isCXXAssumeAttr(A);
+  if (isInterestingAttribute && asc.alwaysAdd(*this, A)) {
     autoCreateBlock();
     appendStmt(Block, A);
   }
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 318fa3c1caf06..8036f03e289d4 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1950,7 +1950,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     // to be explicitly evaluated.
     case Stmt::PredefinedExprClass:
     case Stmt::AddrLabelExprClass:
-    case Stmt::AttributedStmtClass:
     case Stmt::IntegerLiteralClass:
     case Stmt::FixedPointLiteralClass:
     case Stmt::CharacterLiteralClass:
@@ -1981,6 +1980,13 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       break;
     }
 
+    case Stmt::AttributedStmtClass: {
+      Bldr.takeNodes(Pred);
+      VisitAttributedStmt(cast<AttributedStmt>(S), Pred, Dst);
+      Bldr.addNodes(Dst);
+      break;
+    }
+
     case Stmt::CXXDefaultArgExprClass:
     case Stmt::CXXDefaultInitExprClass: {
       Bldr.takeNodes(Pred);
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 1061dafbb2473..50176f8fdbc3d 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -796,9 +796,10 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
 
   // Find the predecessor block.
   ProgramStateRef SrcState = state;
+
   for (const ExplodedNode *N = Pred ; N ; N = *N->pred_begin()) {
-    ProgramPoint PP = N->getLocation();
-    if (PP.getAs<PreStmtPurgeDeadSymbols>() || PP.getAs<BlockEntrance>()) {
+    auto Edge = N->getLocationAs<BlockEdge>();
+    if (!Edge.has_value()) {
       // If the state N has multiple predecessors P, it means that successors
       // of P are all equivalent.
       // In turn, that means that all nodes at P are equivalent in terms
@@ -806,7 +807,7 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
       // FIXME: a more robust solution which does not walk up the tree.
       continue;
     }
-    SrcBlock = PP.castAs<BlockEdge>().getSrc();
+    SrcBlock = Edge->getSrc();
     SrcState = N->getState();
     break;
   }
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index f7020da2e6da2..5f9dbcb55e811 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/AttrIterator.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ParentMap.h"
 #include "clang/AST/StmtCXX.h"
@@ -1200,3 +1201,20 @@ void ExprEngine::VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred,
   // FIXME: Move all post/pre visits to ::Visit().
   getCheckerManager().runCheckersForPostStmt(Dst, Tmp, LE, *this);
 }
+
+void ExprEngine::VisitAttributedStmt(const AttributedStmt *A,
+                                     ExplodedNode *Pred, ExplodedNodeSet &Dst) {
+  ExplodedNodeSet CheckerPreStmt;
+  getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);
+
+  ExplodedNodeSet EvalSet;
+  StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
+
+  for (const auto *Attr : getSpecificAttrs<CXXAssumeAttr>(A->getAttrs())) {
+    for (ExplodedNode *N : CheckerPreStmt) {
+      Visit(Attr->getAssumption(), N, EvalSet);
+    }
+  }
+
+  getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
+}
diff --git a/clang/test/Analysis/cxx23-assume-attribute.cpp b/clang/test/Analysis/cxx23-assume-attribute.cpp
new file mode 100644
index 0000000000000..ee049af9f13aa
--- /dev/null
+++ b/clang/test/Analysis/cxx23-assume-attribute.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_analyze_cc1 -std=c++23 -triple x86_64-pc-linux-gnu \
+// RUN:   -analyzer-checker=core,debug.ExprInspection -verify %s
+
+template <typename T> void clang_analyzer_dump(T);
+template <typename T> void clang_analyzer_value(T);
+
+int ternary_in_builtin_assume(int a, int b) {
+  __builtin_assume(a > 10 ? b == 4 : b == 10);
+
+  clang_analyzer_value(a);
+  // expected-warning at -1 {{[-2147483648, 10]}}
+  // expected-warning at -2 {{[11, 2147483647]}}
+
+  clang_analyzer_dump(b); // expected-warning{{4}} expected-warning{{10}}
+
+  if (a > 20) {
+    clang_analyzer_dump(b + 100); // expected-warning {{104}}
+    return 2;
+  }
+  if (a > 10) {
+    clang_analyzer_dump(b + 200); // expected-warning {{204}}
+    return 1;
+  }
+  clang_analyzer_dump(b + 300); // expected-warning {{310}}
+  return 0;
+}
+
+// From: https://github.com/llvm/llvm-project/pull/116462#issuecomment-2517853226
+int ternary_in_assume(int a, int b) {
+  // FIXME notes
+  // Currently, if this test is run without the core.builtin.Builtin checker, the above function with the __builtin_assume behaves identically to the following test
+  // i.e. calls to `clang_analyzer_dump` result in "extraneous"  prints of the SVal(s) `reg<int b> ...`
+  // as opposed to 4 or 10
+  // which likely implies the Program State(s) did not get narrowed.
+  // A new checker is likely needed to be implemented to properly handle the expressions within `[[assume]]` to eliminate the states where `b` is not narrowed.
+
+  [[assume(a > 10 ? b == 4 : b == 10)]];
+  clang_analyzer_value(a);
+  // expected-warning at -1 {{[-2147483648, 10]}}
+  // expected-warning at -2 {{[11, 2147483647]}}
+
+  clang_analyzer_dump(b); // expected-warning {{4}} expected-warning {{10}}
+  // expected-warning-re at -1 {{reg_${{[0-9]+}}<int b>}} FIXME: We shouldn't have this dump.
+
+  if (a > 20) {
+    clang_analyzer_dump(b + 100); // expected-warning {{104}}
+    // expected-warning-re at -1 {{(reg_${{[0-9]+}}<int b>) + 100}} FIXME: We shouldn't have this dump.
+    return 2;
+  }
+  if (a > 10) {
+    clang_analyzer_dump(b + 200); // expected-warning {{204}}
+    // expected-warning-re at -1 {{(reg_${{[0-9]+}}<int b>) + 200}} FIXME: We shouldn't have this dump.
+    return 1;
+  }
+  clang_analyzer_dump(b + 300); // expected-warning {{310}}
+  // expected-warning-re at -1 {{(reg_${{[0-9]+}}<int b>) + 300}} FIXME: We shouldn't have this dump.
+  return 0;
+}
+
+int assume_and_fallthrough_at_the_same_attrstmt(int a, int b) {
+  [[assume(a == 2)]];
+  clang_analyzer_dump(a); // expected-warning {{2 S32b}}
+  // expected-warning-re at -1 {{reg_${{[0-9]+}}<int a>}} FIXME: We shouldn't have this dump.
+  switch (a) {
+    case 2:
+      [[fallthrough, assume(b == 30)]];
+    case 4: {
+      clang_analyzer_dump(b); // expected-warning {{30 S32b}}
+      // expected-warning-re at -1 {{reg_${{[0-9]+}}<int b>}} FIXME: We shouldn't have this dump.
+      return b;
+    }
+  }
+  // This code should be unreachable.
+  [[assume(false)]]; // This should definitely make it so.
+  clang_analyzer_dump(33); // expected-warning {{33 S32b}}
+  return 0;
+}
diff --git a/clang/test/Analysis/out-of-bounds-new.cpp b/clang/test/Analysis/out-of-bounds-new.cpp
index 4e5442422bff4..9843a8c3a650e 100644
--- a/clang/test/Analysis/out-of-bounds-new.cpp
+++ b/clang/test/Analysis/out-of-bounds-new.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,security.ArrayBound -verify %s
+// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -verify %s \
+// RUN:   -analyzer-checker=unix,core,security.ArrayBound,debug.ExprInspection
+
+void clang_analyzer_eval(bool);
 
 // Tests doing an out-of-bounds access after the end of an array using:
 // - constant integer index
@@ -180,3 +183,58 @@ int test_reference_that_might_be_after_the_end(int idx) {
   return ref;
 }
 
+// From: https://github.com/llvm/llvm-project/issues/100762
+extern int arrOf10[10];
+void using_builtin(int x) {
+  __builtin_assume(x > 101); // CallExpr
+  arrOf10[x] = 404; // expected-warning {{Out of bound access to memory}}
+}
+
+void using_assume_attr(int ax) {
+  [[assume(ax > 100)]]; // NullStmt with an "assume" attribute.
+  arrOf10[ax] = 405; // expected-warning {{Out of bound access to memory}}
+}
+
+void using_many_assume_attr(int yx) {
+  [[assume(yx > 104), assume(yx > 200), assume(yx < 300)]]; // NullStmt with an attribute
+  arrOf10[yx] = 406; // expected-warning{{Out of bound access to memory}}
+}
+
+
+int using_builtin_assume_has_no_sideeffects(int y) {
+  // We should not apply sideeffects of the argument of [[assume(...)]].
+  // "y" should not get incremented;
+  __builtin_assume(++y == 43); // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
+  clang_analyzer_eval(y == 42); // expected-warning {{FALSE}}
+  return y;
+}
+
+
+
+int using_assume_attr_has_no_sideeffects(int y) {
+
+  // We should not apply sideeffects of the argument of [[assume(...)]].
+  // "y" should not get incremented;
+  [[assume(++y == 43)]]; // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
+ 
+  clang_analyzer_eval(y == 42); // expected-warning {{TRUE}} expected-warning {{FALSE}} FIXME: This should be only TRUE.
+
+  clang_analyzer_eval(y == 43); // expected-warning {{FALSE}} expected-warning {{TRUE}} FIXME: This should be only FALSE.
+
+  return y;
+}
+
+
+int using_builtinassume_has_no_sideeffects(int u) {
+  // We should not apply sideeffects of the argument of __builtin_assume(...)
+  // "u" should not get incremented;
+  __builtin_assume(++u == 43); // expected-warning {{assumption is ignored because it contains (potential) side-effects}}
+ 
+  // FIXME: evaluate this to true
+  clang_analyzer_eval(u == 42); // expected-warning {{FALSE}}  current behavior 
+
+  // FIXME: evaluate this to false
+  clang_analyzer_eval(u == 43); // expected-warning {{TRUE}}  current behavior 
+
+  return u;
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/129234


More information about the cfe-commits mailing list